From da72249a8edfbafbf76cd2f5f874724e3c6b9e6d Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Wed, 15 Apr 2020 15:49:39 -0500 Subject: [PATCH 01/11] added option to generate a simple JSON report --- floss/main.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/floss/main.py b/floss/main.py index e1c56b0da..2ad145b17 100644 --- a/floss/main.py +++ b/floss/main.py @@ -139,6 +139,8 @@ def make_parser(): parser.add_option("-f", "--functions", dest="functions", help="only analyze the specified functions (comma-separated)", type="string") + parser.add_option("-o", "--output-json", dest="json_output_file", + help="save analysis output as a JSON document") parser.add_option("--save-workspace", dest="save_workspace", help="save vivisect .viv workspace file in current directory", action="store_true") parser.add_option("-m", "--show-metainfo", dest="should_show_metainfo", @@ -848,6 +850,24 @@ def create_r2_script(sample_file_path, r2_script_file, decoded_strings, stack_st # TODO return, catch exception in main() +def create_json_output(json_file_path, sample_file_path, decoded_strings, stack_strings): + """ + Create a report of the analysis performed by FLOSS + :param json_file_path: path to write the report + :param sample_file_path: path of the sample analyzed + :param decoded_strings: list of decoded strings ([DecodedString]) + :param stack_strings: list of stack strings ([StackString]) + """ + results = {'stack_strings': [sanitize_string_for_printing(ss.s) for ss in stack_strings], + 'decoded_strings': [sanitize_string_for_printing(ds.s) for ds in decoded_strings]} + report = {'file_path': sample_file_path, 'results': results} + try: + with open(json_file_path, 'w') as f: + json.dump(report, f) + except Exception: + raise + + def print_static_strings(path, min_length, quiet=False): """ Print static ASCII and UTF-16 strings from provided file. @@ -1093,6 +1113,10 @@ def main(argv=None): if not options.quiet: print("\nFinished execution after %f seconds" % (time1 - time0)) + if options.json_output_file: + create_json_output(options.json_output_file, sample_file_path, decoded_strings, stack_strings) + floss_logger.info("Wrote JSON file to %s\n" % options.json_output_file) + return 0 From 04925e9e76897d63d6661e28573cb4018273a67b Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Tue, 21 Apr 2020 10:48:22 -0500 Subject: [PATCH 02/11] json_output updates --- floss/main.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/floss/main.py b/floss/main.py index 2ad145b17..9d1c6e825 100644 --- a/floss/main.py +++ b/floss/main.py @@ -850,20 +850,23 @@ def create_r2_script(sample_file_path, r2_script_file, decoded_strings, stack_st # TODO return, catch exception in main() -def create_json_output(json_file_path, sample_file_path, decoded_strings, stack_strings): +def create_json_output(options, sample_file_path, decoded_strings, stack_strings): """ Create a report of the analysis performed by FLOSS - :param json_file_path: path to write the report + :param options: parsed options :param sample_file_path: path of the sample analyzed :param decoded_strings: list of decoded strings ([DecodedString]) :param stack_strings: list of stack strings ([StackString]) """ - results = {'stack_strings': [sanitize_string_for_printing(ss.s) for ss in stack_strings], + strings = {'stack_strings': [sanitize_string_for_printing(ss.s) for ss in stack_strings], 'decoded_strings': [sanitize_string_for_printing(ds.s) for ds in decoded_strings]} - report = {'file_path': sample_file_path, 'results': results} + metadata = {'file_path': sample_file_path, + 'stack_strings': not options.no_stack_strings, + 'decoded_strings': not options.no_decoded_strings, + 'static_strings': not options.no_static_strings} try: - with open(json_file_path, 'w') as f: - json.dump(report, f) + with open(options.json_file_path, 'w') as f: + json.dump({'metadata': metadata, 'strings': strings}, f) except Exception: raise @@ -1114,7 +1117,7 @@ def main(argv=None): print("\nFinished execution after %f seconds" % (time1 - time0)) if options.json_output_file: - create_json_output(options.json_output_file, sample_file_path, decoded_strings, stack_strings) + create_json_output(options, sample_file_path, decoded_strings, stack_strings) floss_logger.info("Wrote JSON file to %s\n" % options.json_output_file) return 0 From 1e0d036be4a44de4a1944ff4e0c24b2904e4bbd2 Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Tue, 21 Apr 2020 11:11:15 -0500 Subject: [PATCH 03/11] improve static string handling --- floss/main.py | 79 +++++++++++++++++++++++++-------------------------- 1 file changed, 38 insertions(+), 41 deletions(-) diff --git a/floss/main.py b/floss/main.py index 9d1c6e825..090d13d0e 100644 --- a/floss/main.py +++ b/floss/main.py @@ -10,6 +10,7 @@ import string import logging from time import time +from itertools import chain from optparse import OptionParser, OptionGroup import tabulate @@ -871,52 +872,38 @@ def create_json_output(options, sample_file_path, decoded_strings, stack_strings raise -def print_static_strings(path, min_length, quiet=False): +def get_file_as_mmap(path): + """ + Returns an mmap object of the file + :param path: path of the file to map + """ + with open(path, 'rb') as f: + return mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) + + +def print_static_strings(file_buf, min_length, quiet=False): """ Print static ASCII and UTF-16 strings from provided file. - :param path: input file + :param file_buf: the file buffer :param min_length: minimum string length :param quiet: print strings only, suppresses headers """ - with open(path, "rb") as f: - b = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) - - if os.path.getsize(path) > MAX_FILE_SIZE: - # for large files, there might be a huge number of strings, - # so don't worry about forming everything into a perfect table - if not quiet: - print("FLOSS static ASCII strings") - for s in strings.extract_ascii_strings(b, n=min_length): - print("%s" % s.s) - if not quiet: - print("") - - if not quiet: - print("FLOSS static Unicode strings") - for s in strings.extract_unicode_strings(b, n=min_length): - print("%s" % s.s) - if not quiet: - print("") - - if os.path.getsize(path) > sys.maxsize: - floss_logger.warning("File too large, strings listings may be truncated.") - floss_logger.warning("FLOSS cannot handle files larger than 4GB on 32bit systems.") + static_ascii_strings = strings.extract_ascii_strings(file_buf, min_length) + static_unicode_strings = strings.extract_unicode_strings(file_buf, min_length) - else: - # for reasonably sized files, we can read all the strings at once - if not quiet: - print("FLOSS static ASCII strings") - for s in strings.extract_ascii_strings(b, n=min_length): - print("%s" % (s.s)) - if not quiet: - print("") - - if not quiet: - print("FLOSS static UTF-16 strings") - for s in strings.extract_unicode_strings(b, n=min_length): - print("%s" % (s.s)) - if not quiet: - print("") + if not quiet: + print("FLOSS static ASCII strings") + for s in static_ascii_strings: + print("%s" % s.s) + if not quiet: + print("") + + if not quiet: + print("FLOSS static Unicode strings") + for s in static_unicode_strings: + print("%s" % s.s) + if not quiet: + print("") def print_stack_strings(extracted_strings, quiet=False, expert=False): @@ -1030,7 +1017,17 @@ def main(argv=None): if not is_workspace_file(sample_file_path): if not options.no_static_strings and not options.functions: floss_logger.info("Extracting static strings...") - print_static_strings(sample_file_path, min_length=min_length, quiet=options.quiet) + if os.path.getsize(sample_file_path) > sys.maxsize: + floss_logger.warning("File too large, strings listings may be truncated.") + floss_logger.warning("FLOSS cannot handle files larger than 4GB on 32bit systems.") + + file_buf = get_file_as_mmap(sample_file_path) + print_static_strings(file_buf, min_length=min_length, quiet=options.quiet) + static_ascii_strings = strings.extract_ascii_strings(file_buf, min_length) + static_unicode_strings = strings.extract_unicode_strings(file_buf, min_length) + static_strings = chain(static_ascii_strings, static_unicode_strings) + else: + static_strings = [] if options.no_decoded_strings and options.no_stack_strings and not options.should_show_metainfo: # we are done From 51e86b593ebdafa466ec28c55d810eb9967d8701 Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Tue, 21 Apr 2020 11:29:40 -0500 Subject: [PATCH 04/11] added static strings to json_output - using simplejson to handle iterators and for performance --- floss/main.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/floss/main.py b/floss/main.py index 090d13d0e..aec940611 100644 --- a/floss/main.py +++ b/floss/main.py @@ -6,7 +6,6 @@ import os import sys import mmap -import json import string import logging from time import time @@ -15,6 +14,7 @@ import tabulate import viv_utils +import simplejson as json import version import strings @@ -851,7 +851,7 @@ def create_r2_script(sample_file_path, r2_script_file, decoded_strings, stack_st # TODO return, catch exception in main() -def create_json_output(options, sample_file_path, decoded_strings, stack_strings): +def create_json_output(options, sample_file_path, decoded_strings, stack_strings, static_strings): """ Create a report of the analysis performed by FLOSS :param options: parsed options @@ -859,15 +859,17 @@ def create_json_output(options, sample_file_path, decoded_strings, stack_strings :param decoded_strings: list of decoded strings ([DecodedString]) :param stack_strings: list of stack strings ([StackString]) """ - strings = {'stack_strings': [sanitize_string_for_printing(ss.s) for ss in stack_strings], - 'decoded_strings': [sanitize_string_for_printing(ds.s) for ds in decoded_strings]} + strings = {'stack_strings': stack_strings, + 'decoded_strings': decoded_strings, + 'static_strings': static_strings} metadata = {'file_path': sample_file_path, 'stack_strings': not options.no_stack_strings, 'decoded_strings': not options.no_decoded_strings, 'static_strings': not options.no_static_strings} + report = {'metadata': metadata, 'strings': strings} try: with open(options.json_file_path, 'w') as f: - json.dump({'metadata': metadata, 'strings': strings}, f) + json.dump(report, f, iterable_as_array=True) except Exception: raise @@ -1026,6 +1028,7 @@ def main(argv=None): static_ascii_strings = strings.extract_ascii_strings(file_buf, min_length) static_unicode_strings = strings.extract_unicode_strings(file_buf, min_length) static_strings = chain(static_ascii_strings, static_unicode_strings) + del file_buf else: static_strings = [] @@ -1114,7 +1117,10 @@ def main(argv=None): print("\nFinished execution after %f seconds" % (time1 - time0)) if options.json_output_file: - create_json_output(options, sample_file_path, decoded_strings, stack_strings) + create_json_output(options, sample_file_path, + decoded_strings=decoded_strings, + stack_strings=stack_strings, + static_strings=static_strings) floss_logger.info("Wrote JSON file to %s\n" % options.json_output_file) return 0 From addb7aa303129a0ef106c3fdfa8015e25f62642d Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Tue, 21 Apr 2020 11:39:17 -0500 Subject: [PATCH 05/11] added simplejson to reqs --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 428c62cf8..eda33366f 100644 --- a/setup.py +++ b/setup.py @@ -13,6 +13,7 @@ requirements = [ "q", "pyyaml", + "simplejson", "tabulate", "vivisect", "plugnplay", From d78c5228326227a45a80497cf41cde80fd097246 Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Tue, 21 Apr 2020 11:39:34 -0500 Subject: [PATCH 06/11] variable typo --- floss/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/floss/main.py b/floss/main.py index aec940611..af6cd9514 100644 --- a/floss/main.py +++ b/floss/main.py @@ -868,7 +868,7 @@ def create_json_output(options, sample_file_path, decoded_strings, stack_strings 'static_strings': not options.no_static_strings} report = {'metadata': metadata, 'strings': strings} try: - with open(options.json_file_path, 'w') as f: + with open(options.json_output_file, 'w') as f: json.dump(report, f, iterable_as_array=True) except Exception: raise From fa1d7cfa11a9491320891474d6399d787ddb1522 Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Tue, 21 Apr 2020 11:42:55 -0500 Subject: [PATCH 07/11] LocationType subclass str and Enum to fix JSON encoding --- floss/decoding_manager.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/floss/decoding_manager.py b/floss/decoding_manager.py index 6f0b9ecaa..94c1e1c95 100644 --- a/floss/decoding_manager.py +++ b/floss/decoding_manager.py @@ -21,10 +21,10 @@ DecodedString = namedtuple("DecodedString", ["va", "s", "decoded_at_va", "fva", "characteristics"]) -class LocationType(Enum): - STACK = 1 - GLOBAL = 2 - HEAP = 3 +class LocationType(str, Enum): + STACK = 'STACK' + GLOBAL = 'GLOBAL' + HEAP = 'HEAP' def is_import(emu, va): From 88cab25ca5691e2bb8577abd0c397c2e641ef0d3 Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Tue, 21 Apr 2020 11:46:03 -0500 Subject: [PATCH 08/11] add analysis date to report --- floss/main.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/floss/main.py b/floss/main.py index af6cd9514..c81f3a557 100644 --- a/floss/main.py +++ b/floss/main.py @@ -8,6 +8,7 @@ import mmap import string import logging +import datetime from time import time from itertools import chain from optparse import OptionParser, OptionGroup @@ -863,6 +864,7 @@ def create_json_output(options, sample_file_path, decoded_strings, stack_strings 'decoded_strings': decoded_strings, 'static_strings': static_strings} metadata = {'file_path': sample_file_path, + 'date': datetime.datetime.now().isoformat(), 'stack_strings': not options.no_stack_strings, 'decoded_strings': not options.no_decoded_strings, 'static_strings': not options.no_static_strings} From 39d1d47cdea924b2b6299a937846549a6ea39fbc Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Wed, 22 Apr 2020 09:35:51 -0500 Subject: [PATCH 09/11] missing param in create_json_output docstring --- floss/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/floss/main.py b/floss/main.py index c81f3a557..d42523521 100644 --- a/floss/main.py +++ b/floss/main.py @@ -859,6 +859,7 @@ def create_json_output(options, sample_file_path, decoded_strings, stack_strings :param sample_file_path: path of the sample analyzed :param decoded_strings: list of decoded strings ([DecodedString]) :param stack_strings: list of stack strings ([StackString]) + :param static_strings: iterable of static strings ([String]) """ strings = {'stack_strings': stack_strings, 'decoded_strings': decoded_strings, From 2521339d153a1d743f58d09c8467c9a4e0c09089 Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Sat, 30 May 2020 08:01:21 -0500 Subject: [PATCH 10/11] added string sanitize iterator --- floss/main.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/floss/main.py b/floss/main.py index d42523521..97dbd82b7 100644 --- a/floss/main.py +++ b/floss/main.py @@ -80,6 +80,17 @@ def decode_strings(vw, decoding_functions_candidates, min_length, no_filter=Fals return decoded_strings +def sanitize_strings_iterator(str_coll): + """ + Iterate a collection and yield sanitized strings (uses sanitize_string_for_printing) + :param str_coll: collection of strings to be sanitized + :return: a sanitized string + """ + for s_obj in str_coll: + s = getattr(s_obj, 's', s_obj) # Use .s attribute from each namedtuple if possible + yield sanitize_string_for_printing(s) + + def sanitize_string_for_printing(s): """ Return sanitized string for printing. @@ -861,9 +872,9 @@ def create_json_output(options, sample_file_path, decoded_strings, stack_strings :param stack_strings: list of stack strings ([StackString]) :param static_strings: iterable of static strings ([String]) """ - strings = {'stack_strings': stack_strings, - 'decoded_strings': decoded_strings, - 'static_strings': static_strings} + strings = {'stack_strings': sanitize_strings_iterator(stack_strings), + 'decoded_strings': sanitize_strings_iterator(decoded_strings), + 'static_strings': sanitize_strings_iterator(static_strings)} metadata = {'file_path': sample_file_path, 'date': datetime.datetime.now().isoformat(), 'stack_strings': not options.no_stack_strings, From 09d015864721d921db3bc6f087dc306328dc9378 Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Thu, 4 Jun 2020 11:00:21 -0500 Subject: [PATCH 11/11] Update usage.md --- doc/usage.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/usage.md b/doc/usage.md index fc2c35e2f..75236bbb7 100644 --- a/doc/usage.md +++ b/doc/usage.md @@ -49,6 +49,16 @@ Analogous, you can disable the extraction of obfuscated strings or stackstrings. floss.exe --no-stack-strings malware.bin +### Write output as JSON (`-o/--output-json`) + +Use the `-o` or `--output-json` with the name of a file you want + the output to be written to. The resulting report will contain + all the same data that was written to `stdout` but structured + in JSON to make it easy to ingest by a script. + + floss.exe --output-json report.json malware.bin + + ### Quiet mode (`-q`) You can supress the formatting of FLOSS output by providing