From f401939358f0aa52f03f8c6f8044f3f70ca37cfd Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 15:05:10 +0100 Subject: [PATCH 01/49] Update CONTRIBUTING.md --- CONTRIBUTING.md | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 50adfb6..fad1f7b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,18 +2,16 @@ All contributions are welcomed, thank you for taking the time to contribute to this project! -## How can you contribute? +## What branch should you base your contribution? -* Report bugs -* Suggest features and ideas -* Pull requests with a solved GitHub issue and new feature -* Pull request with a new content. +As a general rule, base your contribution on the `develop` branch. ## Persistent Git Branches The following git branches permanent in the Slips repository: -- `main`: contains the stable version of the repository. All new features should be based on this branch. +- `main`: contains the stable version of the repository. +- `develop`: all new features should be based on this branch. ## Naming Git branches for Pull Requests @@ -21,15 +19,11 @@ To keep the Git history clean and facilitate the revision of contributions we ask all branches to follow concise namings. These are the branch-naming patterns to follow when contributing: -- name-bugfix-<>: pull request branch, contains one bugfix, -- name-docs-<>: pull request branch, contains documentation work, -- name-enhance-<>: pull request branch, contains one enhancement (not a new feature, but improvement nonetheless) -- name-feature-<>: pull request branch, contains a new feature, -- name-refactor-<>: pull request branch, contains code refactoring, - -## What branch should you base your contribution? - -As a general rule, base your contribution on the `main` branch. +- bugfix-<>: pull request branch, contains one bugfix, +- docs-<>: pull request branch, contains documentation work, +- enhance-<>: pull request branch, contains one enhancement (not a new feature, but improvement nonetheless) +- feature-<>: pull request branch, contains a new feature, +- refactor-<>: pull request branch, contains code refactoring, ## Creating a pull request @@ -41,3 +35,10 @@ Pull Requests: - If you have developed multiple features and/or bugfixes, create separate branches for each one of them, and request merges for each branch; - The cleaner you code/change/changeset is, the faster it will be merged. + +## How can you contribute? + +* Report bugs +* Suggest features and ideas +* Pull requests with a solved GitHub issue and new feature +* Pull request with a new content. From cc6aa83bcfeb661bfe24d40a18e7008a0e7abc4d Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 19:01:13 +0100 Subject: [PATCH 02/49] PEP8 W291 remove trailing whitespace --- netflowlabeler.py | 159 ++++++++++++++++++++++------------------------ 1 file changed, 77 insertions(+), 82 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index fb0bc42..86ea334 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -40,26 +40,26 @@ class labeler(): """ conditionsGroup = [] """ - conditionsGroup = [ - {'Background': [ - [ {'srcIP': 'all'} ] - ] }, - {'Normal': [ + conditionsGroup = [ + {'Background': [ + [ {'srcIP': 'all'} ] + ] }, + {'Normal': [ [ {'Proto':'IGMP'} ], [ {'Proto':'ARP'} ] - ] }, + ] }, {'Botnet-CC': [ - [ {'srcIP': '10.0.0.151'} ], + [ {'srcIP': '10.0.0.151'} ], [ {'dstIP': '10.0.0.151'} ] - ] }, + ] }, {'Botnet-SPAM': [ - [ {'Proto': 'TCP'}, {'srcPort': '25'} ], + [ {'Proto': 'TCP'}, {'srcPort': '25'} ], [ {'Proto': 'TCP'}, {'dstPort': '25'} ] - ] }, - {'Botnet-DGA': [ + ] }, + {'Botnet-DGA': [ [ {'Proto':'UDP'}, {'srcPort':'53'} ] , - [ {'Proto':'UDP'}, {'dstPort':'53'} ] - ] } + [ {'Proto':'UDP'}, {'dstPort':'53'} ] + ] } ] """ @@ -91,9 +91,9 @@ def getLabel(self, column_values): # Default to empty genericlabel and detailedlabel labelToReturn= ( "(empty)", "(empty)") - # Process all the conditions + # Process all the conditions for group in self.conditionsGroup: - # The first key of the group is the label to put + # The first key of the group is the label to put labelline = list(group.keys())[0] genericlabelToVerify = labelline.split(',')[0].strip() try: @@ -124,7 +124,7 @@ def getLabel(self, column_values): condColumn = list(acond.keys())[0] condValue = acond[condColumn].lower() condColumn = condColumn.lower() - + if condColumn.find('!') != -1: # Negation condition condColumn = condColumn.replace('!','') @@ -274,7 +274,7 @@ def getLabel(self, column_values): labelToReturn = (genericlabelToVerify, detailedlabelToVerify) if args.debug > 0: print('\tNew label assigned: {0}'.format(genericlabelToVerify, detailedlabelToVerify)) - + if args.verbose > 0: if 'Background' in labelToReturn: print(f'\tFinal label assigned: {labelToReturn}') @@ -397,7 +397,7 @@ def process_nfdump(f, headers, labelmachine): # Create the output file with the header outputfile = open(args.netflowFile+'.labeled','w+') - + # Write the column names columnnames = "Date flow start Duration Proto Src IP Addr:Port Dst IP Addr:Port Flags Tos Packets Bytes Flows Label\n" outputfile.writelines(columnnames) @@ -420,33 +420,32 @@ def process_nfdump(f, headers, labelmachine): date = columnValues[0] # Store the value in the dict dict = netflowArray[0] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = date netflowArray[0] = dict hour = columnValues[1] # Store the value in the dict dict = netflowArray[1] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = hour netflowArray[1] = dict duration = columnValues[2] # Store the value in the dict dict = netflowArray[2] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = duration netflowArray[2] = dict protocol = columnValues[3].upper() # Store the value in the dict dict = netflowArray[3] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] #columnName = 'Proto' dict[columnName] = protocol netflowArray[3] = dict - if 'TCP' in protocol or 'UDP' in protocol or 'RTP' in protocol: temp = columnValues[4] if len(temp.split(':')) <= 2: @@ -454,14 +453,14 @@ def process_nfdump(f, headers, labelmachine): srcip = temp.split(':')[0] # Store the value in the dict dict = netflowArray[4] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = srcip netflowArray[4] = dict srcport = temp.split(':')[1] # Store the value in the dict dict = netflowArray[5] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = srcport netflowArray[5] = dict @@ -469,14 +468,14 @@ def process_nfdump(f, headers, labelmachine): dstip = temp2.split(':')[0] # Store the value in the dict dict = netflowArray[6] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = dstip netflowArray[6] = dict dstport = temp2.split(':')[1] # Store the value in the dict dict = netflowArray[7] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = dstport netflowArray[7] = dict elif len(temp.split(':')) > 2: @@ -484,14 +483,14 @@ def process_nfdump(f, headers, labelmachine): srcip = temp[0:temp.rfind(':')] # Store the value in the dict dict = netflowArray[4] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = srcip netflowArray[4] = dict srcport = temp[temp.rfind(':')+1:] # Store the value in the dict dict = netflowArray[5] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = srcport netflowArray[5] = dict @@ -499,18 +498,18 @@ def process_nfdump(f, headers, labelmachine): dstip = temp2[0:temp2.rfind(':')] # Store the value in the dict dict = netflowArray[6] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = dstip netflowArray[6] = dict dstport = temp2[temp2.rfind(':')+1:] # Store the value in the dict dict = netflowArray[7] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = dstport netflowArray[7] = dict else: - print() + print() print('Please implement this protocol!') print(line) sys.exit(-1) @@ -518,69 +517,69 @@ def process_nfdump(f, headers, labelmachine): srcip = temp = columnValues[4] # Store the value in the dict dict = netflowArray[4] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = srcip netflowArray[4] = dict srcport = '0' # Store the value in the dict dict = netflowArray[5] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = srcport netflowArray[5] = dict dstip = temp = columnValues[5] # Store the value in the dict dict = netflowArray[6] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = dstip netflowArray[6] = dict dstport = '0' # Store the value in the dict dict = netflowArray[7] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = dstport netflowArray[7] = dict flags = columnValues[6].upper() # Store the value in the dict dict = netflowArray[8] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = flags netflowArray[8] = dict tos = columnValues[7] # Store the value in the dict dict = netflowArray[9] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = tos netflowArray[9] = dict packets = columnValues[8] # Store the value in the dict dict = netflowArray[10] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = packets netflowArray[10] = dict bytes = columnValues[9] # Store the value in the dict dict = netflowArray[11] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = bytes netflowArray[11] = dict flows = columnValues[10] # Store the value in the dict dict = netflowArray[12] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = flows netflowArray[12] = dict # Empty the label in the dict dict = netflowArray[13] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = "" netflowArray[13] = dict @@ -593,7 +592,7 @@ def process_nfdump(f, headers, labelmachine): genericlabel, detailedlabel = labelmachine.getLabel(netflowArray) # Store the value in the dict dict = netflowArray[13] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = genericlabel netflowArray[13] = dict @@ -775,8 +774,8 @@ def define_type(data): """ try: # If line json, it can be Zeek or suricata - # If line CSV, it can be Argus - # If line TSV, it can be Argus or zeek + # If line CSV, it can be Argus + # If line TSV, it can be Argus or zeek input_type = 'unknown' @@ -968,7 +967,7 @@ def process_argus(column_idx, output_file, labelmachine, filetype): columnDict['start'] = "" netflowArray.append(columnDict) columnDict = {} - + columnDict['Duration'] = "" netflowArray.append(columnDict) columnDict = {} @@ -1038,74 +1037,74 @@ def process_argus(column_idx, output_file, labelmachine, filetype): date = columnValues[0] # Store the value in the dict dict = netflowArray[0] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = date netflowArray[0] = dict hour = columnValues[1] # Store the value in the dict dict = netflowArray[1] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = hour netflowArray[1] = dict duration = columnValues[2] # Store the value in the dict dict = netflowArray[2] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = duration netflowArray[2] = dict protocol = columnValues[3].upper() # Store the value in the dict dict = netflowArray[3] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = protocol netflowArray[3] = dict srcIP = columnValues[4] # Store the value in the dict dict = netflowArray[4] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = srcIP netflowArray[4] = dict if 'ARP' in protocol: - srcPort = '0' + srcPort = '0' # Store the value in the dict dict = netflowArray[5] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = srcPort netflowArray[5] = dict else: srcPort = columnValues[5] # Store the value in the dict dict = netflowArray[5] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = srcPort netflowArray[5] = dict - dstIP = columnValues[6] + dstIP = columnValues[6] # Store the value in the dict dict = netflowArray[6] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = dstIP netflowArray[6] = dict if 'ARP' in protocol: - dstPort = '0' + dstPort = '0' # Store the value in the dict dict = netflowArray[7] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = dstPort netflowArray[7] = dict Flags = columnValues[8] # Store the value in the dict dict = netflowArray[8] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = Flags netflowArray[8] = dict @@ -1113,38 +1112,36 @@ def process_argus(column_idx, output_file, labelmachine, filetype): dstPort = columnValues[7] # Store the value in the dict dict = netflowArray[7] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = dstPort netflowArray[7] = dict Flags = columnValues[8] # Store the value in the dict dict = netflowArray[8] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = Flags netflowArray[8] = dict - - if 'LLC' in protocol: Tos = '0' # Store the value in the dict dict = netflowArray[9] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = Tos netflowArray[9] = dict Packets = columnValues[9] # Store the value in the dict dict = netflowArray[10] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = Packets netflowArray[10] = dict Bytes = columnValues[10] # Store the value in the dict dict = netflowArray[11] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = Bytes netflowArray[11] = dict @@ -1152,28 +1149,28 @@ def process_argus(column_idx, output_file, labelmachine, filetype): genericlabellabel, detailedlabel = labelmachine.getLabel(netflowArray) # Store the value in the dict dict = netflowArray[12] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = genericlabellabel netflowArray[12] = dict elif 'ARP' in protocol: Tos = '0' # Store the value in the dict dict = netflowArray[9] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = Tos netflowArray[9] = dict Packets = columnValues[8] # Store the value in the dict dict = netflowArray[10] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = Packets netflowArray[10] = dict Bytes = columnValues[9] # Store the value in the dict dict = netflowArray[11] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = Bytes netflowArray[11] = dict @@ -1181,28 +1178,28 @@ def process_argus(column_idx, output_file, labelmachine, filetype): genericlabellabel, detailedlabel = labelmachine.getLabel(netflowArray) # Store the value in the dict dict = netflowArray[12] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = genericlabellabel netflowArray[12] = dict else: Tos = columnValues[9] # Store the value in the dict dict = netflowArray[9] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = Tos netflowArray[9] = dict Packets = columnValues[10] # Store the value in the dict dict = netflowArray[10] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = Packets netflowArray[10] = dict Bytes = columnValues[11] # Store the value in the dict dict = netflowArray[11] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = Bytes netflowArray[11] = dict @@ -1210,7 +1207,7 @@ def process_argus(column_idx, output_file, labelmachine, filetype): genericlabellabel, detailedlabel = labelmachine.getLabel(netflowArray) # Store the value in the dict dict = netflowArray[12] - columnName = list(dict.keys())[0] + columnName = list(dict.keys())[0] dict[columnName] = genericlabellabel netflowArray[12] = dict @@ -1250,7 +1247,7 @@ def process_netflow(labelmachine): # ---- Define the type of file headerline = input_file.readline() - # If there are no headers, get out. Most start with '#' but Argus starts with 'StartTime' and nfdump with 'Date' + # If there are no headers, get out. Most start with '#' but Argus starts with 'StartTime' and nfdump with 'Date' if '#' not in headerline[0] and 'Date' not in headerline and 'StartTime' not in headerline and 'ts' not in headerline and 'timestamp' not in headerline: print('The file has not headers. Please add them.') sys.exit(-1) @@ -1265,7 +1262,7 @@ def process_netflow(labelmachine): # Store the headers in the output file output_netflow_line_to_file(output_file, headerline) - # ---- Define the columns + # ---- Define the columns if filetype == 'zeek-json': column_idx = define_columns(headerline, filetype='json') amount_lines_processed = 0 @@ -1299,7 +1296,7 @@ def process_netflow(labelmachine): elif filetype == 'nfdump-tab': column_idx = define_columns(headerline, filetype='tab') amount_lines_processed = process_nfdump(column_idx, input_file, output_file, headerline, labelmachine) - + # Close the outputfile output_file.close() @@ -1330,7 +1327,7 @@ def loadConditions(labelmachine): # exit(1) if args.debug > 0: - print('Loading the conditions from the configuration file ') + print('Loading the conditions from the configuration file ') # Read the conf file line = conf.readline().strip() @@ -1368,7 +1365,7 @@ def loadConditions(labelmachine): line = conf.readline().strip() else: break - labelmachine.addCondition(conditions) + labelmachine.addCondition(conditions) conditions = {} except KeyboardInterrupt: @@ -1409,5 +1406,3 @@ def loadConditions(labelmachine): # CTRL-C pretty handling. print("Keyboard Interruption!. Exiting.") sys.exit(1) - - From 879038fb4e3cc4e5358b3d974da1f7d91d3d4b8b Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 19:01:55 +0100 Subject: [PATCH 03/49] Add gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1377554 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.swp From 70b0345432560c420bb339c0751eed7aca61c097 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 19:09:02 +0100 Subject: [PATCH 04/49] PEP8 E231 missing whitespace after ',' --- netflowlabeler.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 86ea334..fbc687a 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -63,7 +63,7 @@ class labeler(): ] """ - def addCondition(self,condition): + def addCondition(self, condition): """ Add a condition. Input: condition is a string? @@ -127,7 +127,7 @@ def getLabel(self, column_values): if condColumn.find('!') != -1: # Negation condition - condColumn = condColumn.replace('!','') + condColumn = condColumn.replace('!', '') netflowValue = column_values[condColumn] if args.debug > 0: print(f'\t\tTo compare field: {condColumn}, Condition value: {condValue}, Netflow value: {netflowValue}') @@ -343,8 +343,8 @@ def process_nfdump(f, headers, labelmachine): columnDict = {} # Replace the TABs for spaces, if it has them..., and replace the : in the ports to spaces also, and strip the \n, and the word flow - temp2 = headers.replace('flow','') - temp = re.sub( '\s+', ' ', temp2 ).replace(':',' ').strip() + temp2 = headers.replace('flow', '') + temp = re.sub( '\s+', ' ', temp2 ).replace(':', ' ').strip() columnNames = temp.split(' ') # Only to separate src ip from dst ip @@ -396,7 +396,7 @@ def process_nfdump(f, headers, labelmachine): #print netflowArray # Create the output file with the header - outputfile = open(args.netflowFile+'.labeled','w+') + outputfile = open(args.netflowFile+'.labeled', 'w+') # Write the column names columnnames = "Date flow start Duration Proto Src IP Addr:Port Dst IP Addr:Port Flags Tos Packets Bytes Flows Label\n" @@ -412,7 +412,7 @@ def process_nfdump(f, headers, labelmachine): # Parse the columns # Strip and replace ugly stuff - temp2 = line.replace('->','') + temp2 = line.replace('->', '') temp = re.sub( '\s+', ' ', temp2 ).strip() columnValues = temp.split(' ') @@ -1026,7 +1026,7 @@ def process_argus(column_idx, output_file, labelmachine, filetype): # Parse the columns # Strip and replace ugly stuff - temp2 = line.replace('->','') + temp2 = line.replace('->', '') temp = re.sub( '\s+', ' ', temp2 ).strip() columnValues = temp.split(' ') @@ -1236,7 +1236,7 @@ def process_netflow(labelmachine): # Open flows file try: - input_file = open(args.netflowFile,'r') + input_file = open(args.netflowFile, 'r') except Exception as inst: print('Some problem opening the input netflow file. In process_netflow()') print(type(inst)) # the exception instance @@ -1257,7 +1257,7 @@ def process_netflow(labelmachine): print(f'[+] Type of flow file to label: {filetype}') # Create the output file for all cases - output_file = open(args.netflowFile+'.labeled','w+') + output_file = open(args.netflowFile+'.labeled', 'w+') # Store the headers in the output file output_netflow_line_to_file(output_file, headerline) @@ -1386,10 +1386,10 @@ def loadConditions(labelmachine): # Parse the parameters parser = argparse.ArgumentParser(description="Tool to label netflow files", add_help=False) - parser.add_argument('-c','--configFile', metavar='', action='store', required=True, help='path to labeling rules configuration.') - parser.add_argument('-v', '--verbose',metavar='',action='store', required=False, type=int, default=0, help='amount of verbosity. This shows more info about the results.') + parser.add_argument('-c', '--configFile', metavar='', action='store', required=True, help='path to labeling rules configuration.') + parser.add_argument('-v', '--verbose', metavar='', action='store', required=False, type=int, default=0, help='amount of verbosity. This shows more info about the results.') parser.add_argument('-d', '--debug', action='store', required=False, type=int, default=0, help='amount of debugging. This shows inner information about the program.') - parser.add_argument('-f', '--netflowFile',metavar='', action='store', required=True, help='file to label.') + parser.add_argument('-f', '--netflowFile', metavar='', action='store', required=True, help='file to label.') parser.add_argument("-h", "--help", action="help", help="command line help") args = parser.parse_args() From 5d2e28d0ca1df9dc33458b1dfbfb1335ee735c8e Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 19:10:51 +0100 Subject: [PATCH 05/49] PEP8 E201 whitespace after '(' --- netflowlabeler.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index fbc687a..7136ccb 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -89,7 +89,7 @@ def getLabel(self, column_values): """ try: # Default to empty genericlabel and detailedlabel - labelToReturn= ( "(empty)", "(empty)") + labelToReturn= ("(empty)", "(empty)") # Process all the conditions for group in self.conditionsGroup: @@ -344,7 +344,7 @@ def process_nfdump(f, headers, labelmachine): # Replace the TABs for spaces, if it has them..., and replace the : in the ports to spaces also, and strip the \n, and the word flow temp2 = headers.replace('flow', '') - temp = re.sub( '\s+', ' ', temp2 ).replace(':', ' ').strip() + temp = re.sub('\s+', ' ', temp2 ).replace(':', ' ').strip() columnNames = temp.split(' ') # Only to separate src ip from dst ip @@ -413,7 +413,7 @@ def process_nfdump(f, headers, labelmachine): # Parse the columns # Strip and replace ugly stuff temp2 = line.replace('->', '') - temp = re.sub( '\s+', ' ', temp2 ).strip() + temp = re.sub('\s+', ' ', temp2 ).strip() columnValues = temp.split(' ') # Date @@ -953,7 +953,7 @@ def process_argus(column_idx, output_file, labelmachine, filetype): columnDict = {} # Replace the TABs for spaces, if it has them..., and replace the : in the ports to spaces also, and strip the \n, and the word flow - temp = re.sub( '\s+', ' ', headers ).strip() + temp = re.sub('\s+', ' ', headers ).strip() columnNames = temp.split(' ') #if args.debug > 0: @@ -1027,7 +1027,7 @@ def process_argus(column_idx, output_file, labelmachine, filetype): # Parse the columns # Strip and replace ugly stuff temp2 = line.replace('->', '') - temp = re.sub( '\s+', ' ', temp2 ).strip() + temp = re.sub('\s+', ' ', temp2 ).strip() columnValues = temp.split(' ') #if args.debug > 0: From e4347c500dc7826a1fc2a2c81de891baae2b196d Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 19:13:37 +0100 Subject: [PATCH 06/49] PEP8 E265 block comment should start with '# ' --- netflowlabeler.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 7136ccb..755cf0f 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -350,7 +350,7 @@ def process_nfdump(f, headers, labelmachine): # Only to separate src ip from dst ip addressType = '' - #if args.debug > 0: + # if args.debug > 0: # print 'Columns names: {0}'.format(columnNames) for cN in columnNames: @@ -391,9 +391,9 @@ def process_nfdump(f, headers, labelmachine): netflowArray.append(columnDict) columnDict = {} - #if args.debug > 0: - #print 'netflowArray' - #print netflowArray + # if args.debug > 0: + # print 'netflowArray' + # print netflowArray # Create the output file with the header outputfile = open(args.netflowFile+'.labeled', 'w+') @@ -442,7 +442,7 @@ def process_nfdump(f, headers, labelmachine): # Store the value in the dict dict = netflowArray[3] columnName = list(dict.keys())[0] - #columnName = 'Proto' + # columnName = 'Proto' dict[columnName] = protocol netflowArray[3] = dict @@ -583,7 +583,7 @@ def process_nfdump(f, headers, labelmachine): dict[columnName] = "" netflowArray[13] = dict - #if args.debug > 0: + # if args.debug > 0: # print date,hour,duration,protocol, srcip, srcport, dstip, dstport, flags, tos, packets, bytes, flows # print netflowArray @@ -596,8 +596,8 @@ def process_nfdump(f, headers, labelmachine): dict[columnName] = genericlabel netflowArray[13] = dict - #if args.debug > 0: - #print netflowArray + # if args.debug > 0: + # print netflowArray # Ask to store the netflow output_netflow_line_to_file(outputfile, netflowArray) @@ -880,7 +880,7 @@ def process_zeek(column_idx, input_file, output_file, labelmachine, filetype): else: dbytes = int(column_values['dbytes']) column_values['bytes'] = str(sbytes + dbytes) - #print(f'New column bytes = {column_values["bytes"]}') + # print(f'New column bytes = {column_values["bytes"]}') # Sum packets # We do it like this because sometimes the column can be - or 0 @@ -893,7 +893,7 @@ def process_zeek(column_idx, input_file, output_file, labelmachine, filetype): else: dpkts = int(column_values['dpkts']) column_values['pkts'] = str(spkts + dpkts) - #print(f'New column pkst = {column_values["pkts"]}') + # print(f'New column pkst = {column_values["pkts"]}') # Sum ip_bytes # We do it like this because sometimes the column can be - or 0 @@ -906,7 +906,7 @@ def process_zeek(column_idx, input_file, output_file, labelmachine, filetype): else: dip_bytes = int(column_values['resp_ip_bytes']) column_values['ipbytes'] = str(sip_bytes + dip_bytes) - #print(f'New column ipbytes = {column_values["ipbytes"]}') + # print(f'New column ipbytes = {column_values["ipbytes"]}') # Request a label genericlabel, detailedlabel = labelmachine.getLabel(column_values) @@ -956,7 +956,7 @@ def process_argus(column_idx, output_file, labelmachine, filetype): temp = re.sub('\s+', ' ', headers ).strip() columnNames = temp.split(' ') - #if args.debug > 0: + # if args.debug > 0: # print 'Columns names: {0}'.format(columnNames) # So far argus does no have a column Date @@ -1030,7 +1030,7 @@ def process_argus(column_idx, output_file, labelmachine, filetype): temp = re.sub('\s+', ' ', temp2 ).strip() columnValues = temp.split(' ') - #if args.debug > 0: + # if args.debug > 0: # print columnValues # Date @@ -1211,7 +1211,7 @@ def process_argus(column_idx, output_file, labelmachine, filetype): dict[columnName] = genericlabellabel netflowArray[12] = dict - #if args.debug > 0: + # if args.debug > 0: # print netflowArray # Ask to store the netflow @@ -1318,11 +1318,11 @@ def loadConditions(labelmachine): conditionsList = [] try: conf = open(args.configFile) - #try: + # try: # if args.verbose > 0: - ## print('Opening the configuration file \'{0}\''.format(args.configFile)) + # print('Opening the configuration file \'{0}\''.format(args.configFile)) # conf = open(args.configFile) - #except: + # except: # print('The file \'{0}\' couldn\'t be opened.'.format(args.configFile)) # exit(1) @@ -1342,7 +1342,7 @@ def loadConditions(labelmachine): # Read a label if line.strip()[0] != '-': label = line.split(':')[0] - #if args.debug > 0: + # if args.debug > 0: # print 'Label: {}'.format(label) conditions[label]=[] From d46e3adf683d0e3462de09592545ddbb90211570 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 19:14:52 +0100 Subject: [PATCH 07/49] PEP8 E203 whitespace before ':' --- netflowlabeler.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 755cf0f..a5962e3 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -132,7 +132,7 @@ def getLabel(self, column_values): if args.debug > 0: print(f'\t\tTo compare field: {condColumn}, Condition value: {condValue}, Netflow value: {netflowValue}') - if (condValue != netflowValue) or (condValue == 'all') : + if (condValue != netflowValue) or (condValue == 'all'): allTrue = True if args.debug > 0: print('\t\t\tTrue (negative)') @@ -155,7 +155,7 @@ def getLabel(self, column_values): if args.debug > 0: print(f'\t\tTo compare field: {condColumn}, Condition value: {condValue}, Netflow value: {netflowValue}') # Pay attention to directionality of condition 'condValue < flowvalue' - if (int(condValue) < int(netflowValue)) or (condValue == 'all') : + if (int(condValue) < int(netflowValue)) or (condValue == 'all'): allTrue = True if args.debug > 0: print('\t\t\tTrue') @@ -171,7 +171,7 @@ def getLabel(self, column_values): if args.debug > 0: print(f'\t\tTo compare field: {condColumn}, Condition value: {condValue}, Netflow value: {netflowValue}') # Pay attention to directionality of condition 'condValue > flowvalue' - if (int(condValue) > int(netflowValue)) or (condValue == 'all') : + if (int(condValue) > int(netflowValue)) or (condValue == 'all'): allTrue = True if args.debug > 0: print('\t\t\tTrue') @@ -187,7 +187,7 @@ def getLabel(self, column_values): if args.debug > 0: print(f'\t\tTo compare field: {condColumn}, Condition value: {condValue}, Netflow value: {netflowValue}') # Pay attention to directionality of condition 'condValue >= flowvalue' - if (int(condValue) >= int(netflowValue)) or (condValue == 'all') : + if (int(condValue) >= int(netflowValue)) or (condValue == 'all'): allTrue = True if args.debug > 0: print('\t\t\tTrue') @@ -203,7 +203,7 @@ def getLabel(self, column_values): if args.debug > 0: print(f'\t\tTo compare field: {condColumn}, Condition value: {condValue}, Netflow value: {netflowValue}') # Pay attention to directionality of condition 'condValue <= flowvalue' - if (int(condValue) <= int(netflowValue)) or (condValue == 'all') : + if (int(condValue) <= int(netflowValue)) or (condValue == 'all'): allTrue = True if args.debug > 0: print('\t\t\tTrue') @@ -217,7 +217,7 @@ def getLabel(self, column_values): netflowValue = column_values[condColumn] if args.debug > 0: print(f'\t\tTo compare field: {condColumn}, Condition value: {condValue}, Netflow value: {netflowValue}') - if (int(condValue) == int(netflowValue)) or (condValue == 'all') : + if (int(condValue) == int(netflowValue)) or (condValue == 'all'): allTrue = True if args.debug > 0: print('\t\t\tTrue') @@ -253,7 +253,7 @@ def getLabel(self, column_values): else: # It is not a colum that we can treat as a number netflowValue = column_values[condColumn] - if (condValue == netflowValue) or (condValue == 'all') : + if (condValue == netflowValue) or (condValue == 'all'): netflowValue = column_values[condColumn] if args.debug > 0: print(f'\t\tTo compare field: {condColumn}, Condition value: {condValue}, Netflow value: {netflowValue}') From af9f44aecc2cdebbdf668fa79dedfba88269df4a Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 19:16:04 +0100 Subject: [PATCH 08/49] PEP8 E271 multiple spaces after keyword --- netflowlabeler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index a5962e3..42d785f 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -146,7 +146,7 @@ def getLabel(self, column_values): # Normal condition, no negation # Is the column a number? - if ('bytes' in condColumn) or ('packets' in condColumn) or ('srcport' in condColumn) or ('dstport' in condColumn) or ('sbytes' in condColumn) or ('dbyets' in condColumn) or ('spkts' in condColumn) or ('dpkts' in condColumn) or ('ip_orig_bytes' in condColumn) or ('ip_resp_bytes' in condColumn): + if ('bytes' in condColumn) or ('packets' in condColumn) or ('srcport' in condColumn) or ('dstport' in condColumn) or ('sbytes' in condColumn) or ('dbyets' in condColumn) or ('spkts' in condColumn) or ('dpkts' in condColumn) or ('ip_orig_bytes' in condColumn) or ('ip_resp_bytes' in condColumn): # It is a colum that we can treat as a number # Find if there is <, > or = in the condition if '>' in condColumn[-1]: From 9fc3dc385a75ffe8ad2a6751b05204034c85b4ae Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 19:17:07 +0100 Subject: [PATCH 09/49] PEP8 E272 multiple spaces before keyword --- netflowlabeler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 42d785f..3d31c1b 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -165,7 +165,7 @@ def getLabel(self, column_values): print('\t\t\tFalse') allTrue = False break - elif '<' in condColumn[-1]: + elif '<' in condColumn[-1]: condColumn = condColumn[:-1] netflowValue = column_values[condColumn] if args.debug > 0: @@ -181,7 +181,7 @@ def getLabel(self, column_values): print('\t\t\tFalse') allTrue = False break - elif '<=' in condColumn[-2]: + elif '<=' in condColumn[-2]: condColumn = condColumn[:-2] netflowValue = column_values[condColumn] if args.debug > 0: @@ -197,7 +197,7 @@ def getLabel(self, column_values): print('\t\t\tFalse') allTrue = False break - elif '>=' in condColumn[-2]: + elif '>=' in condColumn[-2]: condColumn = condColumn[:-2] netflowValue = column_values[condColumn] if args.debug > 0: From 123a479bc550081a86d5d0a1b064e237beaf11fe Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 19:18:51 +0100 Subject: [PATCH 10/49] PEP8 E116 unexpected indentation (comment) --- netflowlabeler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 3d31c1b..f9476b7 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -392,8 +392,8 @@ def process_nfdump(f, headers, labelmachine): columnDict = {} # if args.debug > 0: - # print 'netflowArray' - # print netflowArray + # print 'netflowArray' + # print netflowArray # Create the output file with the header outputfile = open(args.netflowFile+'.labeled', 'w+') @@ -597,7 +597,7 @@ def process_nfdump(f, headers, labelmachine): netflowArray[13] = dict # if args.debug > 0: - # print netflowArray + # print netflowArray # Ask to store the netflow output_netflow_line_to_file(outputfile, netflowArray) From 4c06a8503f87362a99b369ff22fe95304b904bb2 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 19:21:05 +0100 Subject: [PATCH 11/49] PEP8 E202 whitespace before ')' --- netflowlabeler.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index f9476b7..3d1068b 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -344,7 +344,7 @@ def process_nfdump(f, headers, labelmachine): # Replace the TABs for spaces, if it has them..., and replace the : in the ports to spaces also, and strip the \n, and the word flow temp2 = headers.replace('flow', '') - temp = re.sub('\s+', ' ', temp2 ).replace(':', ' ').strip() + temp = re.sub('\s+', ' ', temp2).replace(':', ' ').strip() columnNames = temp.split(' ') # Only to separate src ip from dst ip @@ -413,7 +413,7 @@ def process_nfdump(f, headers, labelmachine): # Parse the columns # Strip and replace ugly stuff temp2 = line.replace('->', '') - temp = re.sub('\s+', ' ', temp2 ).strip() + temp = re.sub('\s+', ' ', temp2).strip() columnValues = temp.split(' ') # Date @@ -953,7 +953,7 @@ def process_argus(column_idx, output_file, labelmachine, filetype): columnDict = {} # Replace the TABs for spaces, if it has them..., and replace the : in the ports to spaces also, and strip the \n, and the word flow - temp = re.sub('\s+', ' ', headers ).strip() + temp = re.sub('\s+', ' ', headers).strip() columnNames = temp.split(' ') # if args.debug > 0: @@ -1027,7 +1027,7 @@ def process_argus(column_idx, output_file, labelmachine, filetype): # Parse the columns # Strip and replace ugly stuff temp2 = line.replace('->', '') - temp = re.sub('\s+', ' ', temp2 ).strip() + temp = re.sub('\s+', ' ', temp2).strip() columnValues = temp.split(' ') # if args.debug > 0: From 1aebdb425d887714c0bdad9cbb1d316fce4d74f7 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 19:23:02 +0100 Subject: [PATCH 12/49] PEP8 E303 too many blank lines --- netflowlabeler.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 3d1068b..a95ec55 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -81,7 +81,6 @@ def addCondition(self, condition): print(inst) # __str__ allows args to printed directly exit(-1) - def getLabel(self, column_values): """ Get the values of the columns of a netflow line and return a label @@ -402,7 +401,6 @@ def process_nfdump(f, headers, labelmachine): columnnames = "Date flow start Duration Proto Src IP Addr:Port Dst IP Addr:Port Flags Tos Packets Bytes Flows Label\n" outputfile.writelines(columnnames) - # Read the second line to start processing line = f.readline() amount_lines_processed += 1 @@ -587,9 +585,9 @@ def process_nfdump(f, headers, labelmachine): # print date,hour,duration,protocol, srcip, srcport, dstip, dstport, flags, tos, packets, bytes, flows # print netflowArray - # Request a label genericlabel, detailedlabel = labelmachine.getLabel(netflowArray) + # Store the value in the dict dict = netflowArray[13] columnName = list(dict.keys())[0] @@ -602,7 +600,6 @@ def process_nfdump(f, headers, labelmachine): # Ask to store the netflow output_netflow_line_to_file(outputfile, netflowArray) - line = f.readline() amount_lines_processed += 1 @@ -925,8 +922,6 @@ def process_zeek(column_idx, input_file, output_file, labelmachine, filetype): amount_lines_processed += 1 pass - - return amount_lines_processed except Exception as inst: exception_line = sys.exc_info()[2].tb_lineno @@ -1016,7 +1011,6 @@ def process_argus(column_idx, output_file, labelmachine, filetype): columnnames = "Date Time Dur Proto SrcAddr Sport Dir DstAddr Dport State sTos TotPkts TotBytes Label\n" output_file.writelines(columnnames) - # Read the second line to start processing line = f.readline() amount_lines_processed += 1 @@ -1084,7 +1078,6 @@ def process_argus(column_idx, output_file, labelmachine, filetype): dict[columnName] = srcPort netflowArray[5] = dict - dstIP = columnValues[6] # Store the value in the dict dict = netflowArray[6] @@ -1092,7 +1085,6 @@ def process_argus(column_idx, output_file, labelmachine, filetype): dict[columnName] = dstIP netflowArray[6] = dict - if 'ARP' in protocol: dstPort = '0' # Store the value in the dict From f615be197dcdc13f3aeeea74c7df7e798cf38fb2 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 19:23:56 +0100 Subject: [PATCH 13/49] PEP8 E302 expected 2 blank lines, found 1 --- netflowlabeler.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/netflowlabeler.py b/netflowlabeler.py index a95ec55..6a52321 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -288,6 +288,7 @@ def getLabel(self, column_values): print(inst) # __str__ allows args to printed directly exit(-1) + def output_netflow_line_to_file(outputfile, originalline, filetype='', genericlabel='', detailedlabel=''): """ Get data and store it on a new file @@ -824,6 +825,7 @@ def define_type(data): print(str(inst), 0, 1) sys.exit(1) + def process_zeek(column_idx, input_file, output_file, labelmachine, filetype): """ Process a Zeek file @@ -930,6 +932,7 @@ def process_zeek(column_idx, input_file, output_file, labelmachine, filetype): print(str(inst), 0, 1) sys.exit(1) + def process_argus(column_idx, output_file, labelmachine, filetype): """ DEPRECATED!! NEEDS UPDATE COMPLETELY @@ -1218,6 +1221,7 @@ def process_argus(column_idx, output_file, labelmachine, filetype): print(str(inst), 0, 1) sys.exit(1) + def process_netflow(labelmachine): """ This function takes the flowFile and parse it. Then it ask for a label and finally it calls a function to store the netflow in a file From 6883994fd795b56741bfdb78055ac14456de15d1 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 19:26:32 +0100 Subject: [PATCH 14/49] PEP8 E111 indentation is not a multiple of 4 --- netflowlabeler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 6a52321..82d70cd 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -118,7 +118,7 @@ def getLabel(self, column_values): allTrue = True for acond in andcondition: if args.debug > 0: - print('\t\t\tAnd this with : {0}'.format(acond)) + print('\t\t\tAnd this with : {0}'.format(acond)) condColumn = list(acond.keys())[0] condValue = acond[condColumn].lower() From a92e02f3126f6fac4f75ca4050e3b85a5b7b2cbe Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 19:27:32 +0100 Subject: [PATCH 15/49] PEP8 E225 missing whitespace around operator --- netflowlabeler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 82d70cd..6a2082b 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -88,7 +88,7 @@ def getLabel(self, column_values): """ try: # Default to empty genericlabel and detailedlabel - labelToReturn= ("(empty)", "(empty)") + labelToReturn = ("(empty)", "(empty)") # Process all the conditions for group in self.conditionsGroup: @@ -1340,7 +1340,7 @@ def loadConditions(labelmachine): label = line.split(':')[0] # if args.debug > 0: # print 'Label: {}'.format(label) - conditions[label]=[] + conditions[label] = [] # Now read all the conditions for this label line = conf.readline().strip() From 606cf96dfeaa04d9fa604f047d82d973866a3f65 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 19:28:00 +0100 Subject: [PATCH 16/49] PEP8 E305 expected 2 blank lines after class or function definition, found 1 --- netflowlabeler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/netflowlabeler.py b/netflowlabeler.py index 6a2082b..d62ed16 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -1376,6 +1376,7 @@ def loadConditions(labelmachine): sys.exit(-1) return False + if __name__ == '__main__': print('NetFlow labeler. Version {}'.format(version)) print('https://stratosphereips.org') From 8f6cf9b25d6c460086cf08738bead7d050a3f05c Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 19:29:14 +0100 Subject: [PATCH 17/49] PEP8 E712 comparison to False should be 'if cond is False:' or 'if not cond:' --- netflowlabeler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index d62ed16..3928dda 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -749,7 +749,7 @@ def define_columns(headerline, filetype): # We need a temp dict because we can not change the size of dict while analyzing it temp_dict = {} for i in column_idx: - if type(column_idx[i]) == bool and column_idx[i] == False: + if type(column_idx[i]) == bool and column_idx[i] is False: continue temp_dict[i] = column_idx[i] column_idx = temp_dict From 8979c99efcf03e69945456492aabd76505487cb4 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 19:29:56 +0100 Subject: [PATCH 18/49] PEP8 F401 'getopt' imported but unused --- netflowlabeler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 3928dda..04acb1e 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -24,7 +24,6 @@ # Description # A tool to add labels in netflow files based on a configuration. Flow file include Zeek, Argus, and NFdump. Both in CSV and TSV -import getopt import sys import re import json From 34f461b857037b2a2a8779c00802456a10de9fad Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 19:34:38 +0100 Subject: [PATCH 19/49] PEP8 F523 '...'.format(...) has unused arguments --- netflowlabeler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 04acb1e..4b1301a 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -271,7 +271,7 @@ def getLabel(self, column_values): if allTrue: labelToReturn = (genericlabelToVerify, detailedlabelToVerify) if args.debug > 0: - print('\tNew label assigned: {0}'.format(genericlabelToVerify, detailedlabelToVerify)) + print(f'\tNew label assigned: {genericlabelToVerify} {detailedlabelToVerify}') if args.verbose > 0: if 'Background' in labelToReturn: From b173471cc1d4ea71a979077139e187ed8da65bf5 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 23:21:50 +0100 Subject: [PATCH 20/49] Use sys.exit instead of exit --- netflowlabeler.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 4b1301a..41d9b8b 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -78,7 +78,7 @@ def addCondition(self, condition): print(type(inst)) # the exception instance print(inst.args) # arguments stored in .args print(inst) # __str__ allows args to printed directly - exit(-1) + sys.exit(-1) def getLabel(self, column_values): """ @@ -285,7 +285,7 @@ def getLabel(self, column_values): print(type(inst)) # the exception instance print(inst.args) # arguments stored in .args print(inst) # __str__ allows args to printed directly - exit(-1) + sys.exit(-1) def output_netflow_line_to_file(outputfile, originalline, filetype='', genericlabel='', detailedlabel=''): @@ -325,7 +325,7 @@ def output_netflow_line_to_file(outputfile, originalline, filetype='', genericla print(type(inst)) # the exception instance print(inst.args) # arguments stored in .args print(inst) # __str__ allows args to printed directly - exit(-1) + sys.exit(-1) def process_nfdump(f, headers, labelmachine): @@ -1237,7 +1237,7 @@ def process_netflow(labelmachine): print(type(inst)) # the exception instance print(inst.args) # arguments stored in .args print(inst) # __str__ allows args to printed directly - exit(-1) + sys.exit(-1) # ---- Define the type of file headerline = input_file.readline() From 7c6852b12adfc081d62aac785877198b86b57711 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 23:24:02 +0100 Subject: [PATCH 21/49] C0325: Unnecessary parens after 'while' keyword (superfluous-parens) --- netflowlabeler.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 41d9b8b..1ef85cb 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -404,7 +404,7 @@ def process_nfdump(f, headers, labelmachine): # Read the second line to start processing line = f.readline() amount_lines_processed += 1 - while (line): + while line: if args.verbose > 0: print('Netflow line: {0}'.format(line), end=' ') @@ -841,7 +841,7 @@ def process_zeek(column_idx, input_file, output_file, labelmachine, filetype): while '#' in line: line = input_file.readline() - while (line): + while line: # Count the first line amount_lines_processed += 1 @@ -1016,7 +1016,7 @@ def process_argus(column_idx, output_file, labelmachine, filetype): # Read the second line to start processing line = f.readline() amount_lines_processed += 1 - while (line): + while line: if args.verbose > 0: print('Netflow line: {0}'.format(line), end=' ') @@ -1328,7 +1328,7 @@ def loadConditions(labelmachine): line = conf.readline().strip() conditions = {} - while (line): + while line: # Ignore comments if line.strip().find('#') == 0: line = conf.readline().strip() @@ -1343,7 +1343,7 @@ def loadConditions(labelmachine): # Now read all the conditions for this label line = conf.readline().strip() - while (line): + while line: if line.strip()[0] == '-': # Condition tempAndConditions = line.strip().split('-')[1] From fa62fd296c6c3633da272fdf3ceaf31df97d652c Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 23:26:04 +0100 Subject: [PATCH 22/49] Remove non-existent author email --- netflowlabeler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 1ef85cb..e2ca2d3 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -17,7 +17,7 @@ # # # Authors: -# Sebastian Garcia, sebastian.garcia@agents.fel.cvut.cz, sgarcia@exa.unicen.edu.ar, eldraco@gmail.com +# Sebastian Garcia, sebastian.garcia@agents.fel.cvut.cz, eldraco@gmail.com # Veronica Valeros, vero.valeros@gmail.com # Stratosphere Laboratory, Czech Technical University in Prague From da110e765d3579f871e59658d6691a7307e12102 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 23:30:07 +0100 Subject: [PATCH 23/49] C0301 long lines --- netflowlabeler.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index e2ca2d3..a801f5a 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -22,7 +22,8 @@ # Stratosphere Laboratory, Czech Technical University in Prague # Description -# A tool to add labels in netflow files based on a configuration. Flow file include Zeek, Argus, and NFdump. Both in CSV and TSV +# A tool to add labels in netflow files based on a configuration. +# Flow file include Zeek, Argus, and NFdump. Both in CSV and TSV import sys import re @@ -35,10 +36,9 @@ class labeler(): """ - This class handles the adding of new labeling conditions and the return of the lables - """ - conditionsGroup = [] - """ + This class handles the adding of new labeling conditions + and the return of the labels + conditionsGroup = [ {'Background': [ [ {'srcIP': 'all'} ] @@ -61,6 +61,7 @@ class labeler(): ] } ] """ + conditionsGroup = [] def addCondition(self, condition): """ @@ -107,9 +108,11 @@ def getLabel(self, column_values): if args.debug > 0: print('\t\tOr conditions group : {0}'.format(orConditions)) - # orConditions is an array. Each position of this array should be ORed with the next position + # orConditions is an array. + # Each position of this array should be ORed with the next position for andcondition in orConditions: - # If any of these andConditions groups is true, just return the label, because this for is an 'OR' + # If any of these andConditions groups is true, + # just return the label, because this for is an 'OR' if args.debug > 0: print('\t\tAnd condition group : {0}'.format(andcondition)) From 3a01b0c2f3f2a444ab2542fa5457e16d82b25216 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 23:31:36 +0100 Subject: [PATCH 24/49] C0301 long lines --- netflowlabeler.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index a801f5a..dc208dc 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -339,12 +339,15 @@ def process_nfdump(f, headers, labelmachine): # Just to monitor how many lines we read amount_lines_processed = 0 - # Parse the file into an array of dictionaries. We will use the columns names as dictionary keys + # Parse the file into an array of dictionaries. + # We will use the columns names as dictionary keys # Example: [ {'Date': '10/10/2013} , {'SrcIp':'1.1.1.1} , , ] netflowArray = [] columnDict = {} - # Replace the TABs for spaces, if it has them..., and replace the : in the ports to spaces also, and strip the \n, and the word flow + # Replace the TABs for spaces, if it has them..., + # and replace the : in the ports to spaces also, + # and strip the \n, and the word flow temp2 = headers.replace('flow', '') temp = re.sub('\s+', ' ', temp2).replace(':', ' ').strip() columnNames = temp.split(' ') @@ -480,7 +483,8 @@ def process_nfdump(f, headers, labelmachine): dict[columnName] = dstport netflowArray[7] = dict elif len(temp.split(':')) > 2: - # We are using ipv6! THIS DEPENDS A LOT ON THE program that created the netflow.. + # We are using ipv6! THIS DEPENDS A LOT ON THE + # program that created the netflow.. srcip = temp[0:temp.rfind(':')] # Store the value in the dict dict = netflowArray[4] @@ -767,7 +771,8 @@ def define_columns(headerline, filetype): def define_type(data): """ - Try to define very fast the type of input from :Zeek file, Suricata json, Argus binetflow CSV, Argus binetflow TSV + Try to define very fast the type of input from :Zeek file, + Suricata json, Argus binetflow CSV, Argus binetflow TSV Using a Heuristic detection Input: The first line after the headers if there were some, as 'data' Outputs types can be can be: zeek-json, suricata, argus-tab, argus-csv, zeek-tab From 25b6523c85b7bd1e0b4e28ae3d48c3353185e335 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 23:36:18 +0100 Subject: [PATCH 25/49] Add file main docstring --- netflowlabeler.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/netflowlabeler.py b/netflowlabeler.py index dc208dc..94ec13d 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -25,6 +25,10 @@ # A tool to add labels in netflow files based on a configuration. # Flow file include Zeek, Argus, and NFdump. Both in CSV and TSV +""" +netflowlabeler.py is a tool to add labels in netflow files based on a +configuration file. +""" import sys import re import json From c0c8114eecbf78aece5f02a8f3a779f7b6c50655 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Fri, 18 Nov 2022 23:48:34 +0100 Subject: [PATCH 26/49] C0209: Formatting a regular string which could be a f-string --- netflowlabeler.py | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 94ec13d..dd2a119 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -76,7 +76,7 @@ def addCondition(self, condition): self.conditionsGroup.append(condition) if args.debug > 0: - print('\tCondition added: {0}'.format(condition)) + print(f'\tCondition added: {condition}') except Exception as inst: print('Problem in addCondition() in class labeler') @@ -110,7 +110,7 @@ def getLabel(self, column_values): orConditions = group[labelline] if args.debug > 0: - print('\t\tOr conditions group : {0}'.format(orConditions)) + print(f'\t\tOr conditions group : {orConditions}') # orConditions is an array. # Each position of this array should be ORed with the next position @@ -118,13 +118,13 @@ def getLabel(self, column_values): # If any of these andConditions groups is true, # just return the label, because this for is an 'OR' if args.debug > 0: - print('\t\tAnd condition group : {0}'.format(andcondition)) + print(f'\t\tAnd condition group : {andcondition}') # With this we keep control of how each part of the and is going... allTrue = True for acond in andcondition: if args.debug > 0: - print('\t\t\tAnd this with : {0}'.format(acond)) + print(f'\t\t\tAnd this with : {acond}') condColumn = list(acond.keys())[0] condValue = acond[condColumn].lower() @@ -360,7 +360,7 @@ def process_nfdump(f, headers, labelmachine): addressType = '' # if args.debug > 0: - # print 'Columns names: {0}'.format(columnNames) + # print(f'Columns names: {columnNames}') for cN in columnNames: # Separate between src ip and dst ip @@ -401,8 +401,8 @@ def process_nfdump(f, headers, labelmachine): columnDict = {} # if args.debug > 0: - # print 'netflowArray' - # print netflowArray + # print('netflowArray') + # print(netflowArray) # Create the output file with the header outputfile = open(args.netflowFile+'.labeled', 'w+') @@ -416,7 +416,7 @@ def process_nfdump(f, headers, labelmachine): amount_lines_processed += 1 while line: if args.verbose > 0: - print('Netflow line: {0}'.format(line), end=' ') + print(f'Netflow line: {line}', end=' ') # Parse the columns # Strip and replace ugly stuff @@ -593,8 +593,8 @@ def process_nfdump(f, headers, labelmachine): netflowArray[13] = dict # if args.debug > 0: - # print date,hour,duration,protocol, srcip, srcport, dstip, dstport, flags, tos, packets, bytes, flows - # print netflowArray + # print(date,hour,duration,protocol, srcip, srcport, dstip, dstport, flags, tos, packets, bytes, flows) + # print(netflowArray) # Request a label genericlabel, detailedlabel = labelmachine.getLabel(netflowArray) @@ -606,7 +606,7 @@ def process_nfdump(f, headers, labelmachine): netflowArray[13] = dict # if args.debug > 0: - # print netflowArray + # print(netflowArray) # Ask to store the netflow output_netflow_line_to_file(outputfile, netflowArray) @@ -966,7 +966,7 @@ def process_argus(column_idx, output_file, labelmachine, filetype): columnNames = temp.split(' ') # if args.debug > 0: - # print 'Columns names: {0}'.format(columnNames) + # print(f'Columns names: {columnNames}') # So far argus does no have a column Date columnDict['Date'] = "" @@ -1030,7 +1030,7 @@ def process_argus(column_idx, output_file, labelmachine, filetype): amount_lines_processed += 1 while line: if args.verbose > 0: - print('Netflow line: {0}'.format(line), end=' ') + print(f'Netflow line: {line}', end=' ') # Parse the columns # Strip and replace ugly stuff @@ -1039,7 +1039,7 @@ def process_argus(column_idx, output_file, labelmachine, filetype): columnValues = temp.split(' ') # if args.debug > 0: - # print columnValues + # print(columnValues) # Date date = columnValues[0] @@ -1218,7 +1218,7 @@ def process_argus(column_idx, output_file, labelmachine, filetype): netflowArray[12] = dict # if args.debug > 0: - # print netflowArray + # print(netflowArray) # Ask to store the netflow output_netflow_line_to_file(output_file, netflowArray) @@ -1239,7 +1239,7 @@ def process_netflow(labelmachine): """ try: if args.verbose > 0: - print('[+] Processing the flow file {0}'.format(args.netflowFile)) + print(f'[+] Processing the flow file {args.netflowFile}') # Open flows file try: @@ -1307,7 +1307,7 @@ def process_netflow(labelmachine): # Close the outputfile output_file.close() - print('Amount of lines read: {0}'.format(amount_lines_processed)) + print(f'Amount of lines read: {amount_lines_processed}') except Exception as inst: exception_line = sys.exc_info()[2].tb_lineno @@ -1327,10 +1327,10 @@ def loadConditions(labelmachine): conf = open(args.configFile) # try: # if args.verbose > 0: - # print('Opening the configuration file \'{0}\''.format(args.configFile)) + # print(f'Opening the configuration file \'{args.configFile}\'') # conf = open(args.configFile) # except: - # print('The file \'{0}\' couldn\'t be opened.'.format(args.configFile)) + # print(f'The file \'{args.configFile}\' couldn\'t be opened.') # exit(1) if args.debug > 0: @@ -1350,7 +1350,7 @@ def loadConditions(labelmachine): if line.strip()[0] != '-': label = line.split(':')[0] # if args.debug > 0: - # print 'Label: {}'.format(label) + # print(f'Label: {label}') conditions[label] = [] # Now read all the conditions for this label @@ -1360,7 +1360,7 @@ def loadConditions(labelmachine): # Condition tempAndConditions = line.strip().split('-')[1] if args.debug > 1: - print('Condition: {}'.format(tempAndConditions)) + print(f'Condition: {tempAndConditions}') andConditions = [] for andCond in tempAndConditions.split('&'): tempdict = {} @@ -1389,7 +1389,7 @@ def loadConditions(labelmachine): if __name__ == '__main__': - print('NetFlow labeler. Version {}'.format(version)) + print(f'NetFlow labeler. Version {version}') print('https://stratosphereips.org') # Parse the parameters From 883954fa600d259026cc78eca5477107eb58b6c2 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sat, 19 Nov 2022 00:08:32 +0100 Subject: [PATCH 27/49] Format tool header name --- netflowlabeler.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index dd2a119..b82771c 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -1389,8 +1389,9 @@ def loadConditions(labelmachine): if __name__ == '__main__': - print(f'NetFlow labeler. Version {version}') - print('https://stratosphereips.org') + print(f'NetFlow Labeler v{version}') + print('Stratosphere Research Laboratory (https://stratosphereips.org)') + print() # Parse the parameters parser = argparse.ArgumentParser(description="Tool to label netflow files", add_help=False) From 0ad126cac3b0ef6b6984a1c376dcd10e71cb8b0d Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sat, 19 Nov 2022 00:10:15 +0100 Subject: [PATCH 28/49] Improve tool argparse description --- netflowlabeler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index b82771c..8d6ba82 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -1394,7 +1394,7 @@ def loadConditions(labelmachine): print() # Parse the parameters - parser = argparse.ArgumentParser(description="Tool to label netflow files", add_help=False) + parser = argparse.ArgumentParser(description="A configurable rule-based labeling tool for network flow files", add_help=False) parser.add_argument('-c', '--configFile', metavar='', action='store', required=True, help='path to labeling rules configuration.') parser.add_argument('-v', '--verbose', metavar='', action='store', required=False, type=int, default=0, help='amount of verbosity. This shows more info about the results.') parser.add_argument('-d', '--debug', action='store', required=False, type=int, default=0, help='amount of debugging. This shows inner information about the program.') From 69d6d31013c8f8867186d31c928ee4f6ce840675 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sat, 19 Nov 2022 00:19:52 +0100 Subject: [PATCH 29/49] C0301 Lines too long on parser help --- netflowlabeler.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 8d6ba82..32a1f57 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -1395,10 +1395,14 @@ def loadConditions(labelmachine): # Parse the parameters parser = argparse.ArgumentParser(description="A configurable rule-based labeling tool for network flow files", add_help=False) - parser.add_argument('-c', '--configFile', metavar='', action='store', required=True, help='path to labeling rules configuration.') - parser.add_argument('-v', '--verbose', metavar='', action='store', required=False, type=int, default=0, help='amount of verbosity. This shows more info about the results.') - parser.add_argument('-d', '--debug', action='store', required=False, type=int, default=0, help='amount of debugging. This shows inner information about the program.') - parser.add_argument('-f', '--netflowFile', metavar='', action='store', required=True, help='file to label.') + parser.add_argument('-c', '--configFile', metavar='', action='store', + required=True, help='path to labeling rules configuration.') + parser.add_argument('-v', '--verbose', metavar='', action='store', + required=False, type=int, default=0, help='set verbosity level.') + parser.add_argument('-d', '--debug', action='store', required=False, type=int, default=0, + help='amount of debugging. This shows inner information about the program.') + parser.add_argument('-f', '--netflowFile', metavar='', action='store', + required=True, help='file to label.') parser.add_argument("-h", "--help", action="help", help="command line help") args = parser.parse_args() From 1d4c6cfbe80c048dd880e448094a5617d62c0aff Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sat, 19 Nov 2022 00:23:08 +0100 Subject: [PATCH 30/49] Use argparse default help instead of self defined --- netflowlabeler.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 32a1f57..bfcd0a2 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -1394,16 +1394,15 @@ def loadConditions(labelmachine): print() # Parse the parameters - parser = argparse.ArgumentParser(description="A configurable rule-based labeling tool for network flow files", add_help=False) + parser = argparse.ArgumentParser(description="A configurable rule-based labeling tool for network flow files") parser.add_argument('-c', '--configFile', metavar='', action='store', - required=True, help='path to labeling rules configuration.') + required=True, help='path to labeling configuration file.') + parser.add_argument('-f', '--netflowFile', metavar='', action='store', + required=True, help='file to label.') parser.add_argument('-v', '--verbose', metavar='', action='store', required=False, type=int, default=0, help='set verbosity level.') parser.add_argument('-d', '--debug', action='store', required=False, type=int, default=0, help='amount of debugging. This shows inner information about the program.') - parser.add_argument('-f', '--netflowFile', metavar='', action='store', - required=True, help='file to label.') - parser.add_argument("-h", "--help", action="help", help="command line help") args = parser.parse_args() try: From cf1ada823969c79ac15cab2f5805134971a3a500 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sat, 19 Nov 2022 00:30:03 +0100 Subject: [PATCH 31/49] Update argparse help for clarity --- netflowlabeler.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index bfcd0a2..6453201 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -1396,13 +1396,13 @@ def loadConditions(labelmachine): # Parse the parameters parser = argparse.ArgumentParser(description="A configurable rule-based labeling tool for network flow files") parser.add_argument('-c', '--configFile', metavar='', action='store', - required=True, help='path to labeling configuration file.') + required=True, help='path to labeling configuration.') parser.add_argument('-f', '--netflowFile', metavar='', action='store', - required=True, help='file to label.') - parser.add_argument('-v', '--verbose', metavar='', action='store', + required=True, help='path to the file to label.') + parser.add_argument('-v', '--verbose', action='store', required=False, type=int, default=0, help='set verbosity level.') parser.add_argument('-d', '--debug', action='store', required=False, type=int, default=0, - help='amount of debugging. This shows inner information about the program.') + help='set debugging level.') args = parser.parse_args() try: From 2e81d1664940e3eb3219aaf16d2bae3225109d84 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sat, 19 Nov 2022 00:32:22 +0100 Subject: [PATCH 32/49] Catch all other exceptions in __main__ --- netflowlabeler.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/netflowlabeler.py b/netflowlabeler.py index 6453201..ab7bf37 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -1418,3 +1418,9 @@ def loadConditions(labelmachine): # CTRL-C pretty handling. print("Keyboard Interruption!. Exiting.") sys.exit(1) + except Exception as inst: + # Notify of any other exception + print('Exception in __main__') + print(type(inst)) # the exception instance + print(inst.args) # arguments stored in .args + print(inst) # __str__ allows args to printed directly From 6ca44b5bca8f110af816b344121888aee260757a Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sat, 19 Nov 2022 00:36:16 +0100 Subject: [PATCH 33/49] Fix incorrect English on labelling, using American english Labeling --- netflowlabeler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index ab7bf37..faa0008 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -1320,7 +1320,7 @@ def process_netflow(labelmachine): def loadConditions(labelmachine): """ - Load the labelling conditions from a conf file + Load the labeling conditions from a conf file """ conditionsList = [] try: From 2f2f5727a4e3461ccf712c8eed647b467fe6df33 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sat, 19 Nov 2022 00:52:08 +0100 Subject: [PATCH 34/49] Added comments to loadConditions() for clarity --- netflowlabeler.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index faa0008..2039e02 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -1336,12 +1336,17 @@ def loadConditions(labelmachine): if args.debug > 0: print('Loading the conditions from the configuration file ') - # Read the conf file + # Read the labeling configuration file line = conf.readline().strip() conditions = {} + # Process each line of the labeling configuration file + # There are three possible options here: + # - We read a comment: # + # - We read a label: does not start with symbols + # - We read a label condition: starts with '-' while line: - # Ignore comments + # Ignore comments marked with '#' if line.strip().find('#') == 0: line = conf.readline().strip() continue @@ -1356,11 +1361,15 @@ def loadConditions(labelmachine): # Now read all the conditions for this label line = conf.readline().strip() while line: + # If line starts with '-' is a condition if line.strip()[0] == '-': - # Condition + # Parse the condition tempAndConditions = line.strip().split('-')[1] if args.debug > 1: print(f'Condition: {tempAndConditions}') + + # Check if the condition is composed, + # e.g.: srcIP=xxx.xxx.xxx.xxx & dstPort=xx andConditions = [] for andCond in tempAndConditions.split('&'): tempdict = {} From 1d5f5e031dcf559408546a83a9f85b2d1092703d Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sat, 19 Nov 2022 18:47:45 +0100 Subject: [PATCH 35/49] Rename function loadConditions to load_conditions --- netflowlabeler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 2039e02..f853d85 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -1318,7 +1318,7 @@ def process_netflow(labelmachine): exit(-1) -def loadConditions(labelmachine): +def load_conditions(labelmachine): """ Load the labeling conditions from a conf file """ @@ -1389,7 +1389,7 @@ def loadConditions(labelmachine): print("Keyboard Interruption!. Exiting.") sys.exit(1) except Exception as inst: - print('Problem in main() function at loadConditions ') + print('Problem in main() function at load_conditions ') print(type(inst)) # the exception instance print(inst.args) # arguments stored in .args print(inst) # __str__ allows args to printed directly @@ -1419,7 +1419,7 @@ def loadConditions(labelmachine): labelmachine = labeler() # Load conditions - loadConditions(labelmachine) + load_conditions(labelmachine) # Direct process of netflow flows process_netflow(labelmachine) From 60ef9275d6766b3c1f32f54d0f4f9a0f67d7f185 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sat, 19 Nov 2022 19:36:57 +0100 Subject: [PATCH 36/49] Add netflowlabeler high level diagram --- README.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/README.md b/README.md index 4d2efe8..39978ef 100644 --- a/README.md +++ b/README.md @@ -106,3 +106,26 @@ To mount your logs path to the container and automatically run the netflow label ```bash docker run -v /full/path/to/logs/:/netflowlabeler/data --rm -it stratosphereips/netflowlabeler:latest python3 netflowlabeler.py -c data/labels.config -f data/conn.log ``` + +# Netflow Labeler High Level Diagram + +```mermaid +flowchart LR; + NetFlow["Netflow File"]-->labeler; + Config["Labels Config"]-->labeler; + subgraph ONE["Interpret Input File"] + labeler-->load_conditions; + load_conditions-->process_netflow; + process_netflow-->define_type; + define_type-->define_columns; + end + subgraph TWO["Label NetFlow File"] + define_columns-.->process_argus; + define_columns-.->process_nfdump; + define_columns-->process_zeek; + process_argus-.->output_netflow_line_to_file; + process_nfdump-.->output_netflow_line_to_file; + process_zeek-->output_netflow_line_to_file; + end + output_netflow_line_to_file-->Output["Labeled NetFlow File"]; +``` From 876b4b9ba52230afc00d2043f587818218ddd50b Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sat, 19 Nov 2022 22:45:00 +0100 Subject: [PATCH 37/49] Wrap process_netflow docstring shorter --- netflowlabeler.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index f853d85..bc09ae0 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -1235,7 +1235,9 @@ def process_argus(column_idx, output_file, labelmachine, filetype): def process_netflow(labelmachine): """ - This function takes the flowFile and parse it. Then it ask for a label and finally it calls a function to store the netflow in a file + This function takes the flowFile and parse it. + Then it asks for a label and finally it calls + a function to store the netflow in a file. """ try: if args.verbose > 0: From 69e787c2101c47f7a2ca7f6fac3feb827902618b Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sat, 19 Nov 2022 22:45:28 +0100 Subject: [PATCH 38/49] Remove unnecessary commented debug lines --- netflowlabeler.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index bc09ae0..3789ca0 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -1327,13 +1327,6 @@ def load_conditions(labelmachine): conditionsList = [] try: conf = open(args.configFile) - # try: - # if args.verbose > 0: - # print(f'Opening the configuration file \'{args.configFile}\'') - # conf = open(args.configFile) - # except: - # print(f'The file \'{args.configFile}\' couldn\'t be opened.') - # exit(1) if args.debug > 0: print('Loading the conditions from the configuration file ') From 2233feb33f14486f78330703da3605db331f6947 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sat, 19 Nov 2022 22:46:18 +0100 Subject: [PATCH 39/49] Improve load_conditions docstring --- netflowlabeler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 3789ca0..5c0af8c 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -1322,7 +1322,8 @@ def process_netflow(labelmachine): def load_conditions(labelmachine): """ - Load the labeling conditions from a conf file + Load the labeling conditions from a configuration file. + Input: labelmachine is a labeler object """ conditionsList = [] try: From 377535d6b36cfbf59857586ac266f3006e2d3cd9 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Mon, 21 Nov 2022 13:50:11 +0100 Subject: [PATCH 40/49] Add more comments to the code --- netflowlabeler.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 5c0af8c..06ee146 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -1324,6 +1324,7 @@ def load_conditions(labelmachine): """ Load the labeling conditions from a configuration file. Input: labelmachine is a labeler object + Output: modified labelmachine object. No return instruction. """ conditionsList = [] try: @@ -1376,6 +1377,7 @@ def load_conditions(labelmachine): line = conf.readline().strip() else: + # Finished reading all conditions for a given label break labelmachine.addCondition(conditions) conditions = {} @@ -1414,7 +1416,7 @@ def load_conditions(labelmachine): # Create an instance of the labeler labelmachine = labeler() - # Load conditions + # Load labeling conditions from config file load_conditions(labelmachine) # Direct process of netflow flows From b26b42b9d69ffe15ad2d9b1f0c7375fde6e65c82 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Mon, 21 Nov 2022 14:54:14 +0100 Subject: [PATCH 41/49] Improve process_netflow parsing of valid headers --- netflowlabeler.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 06ee146..2932c31 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -1247,18 +1247,23 @@ def process_netflow(labelmachine): try: input_file = open(args.netflowFile, 'r') except Exception as inst: - print('Some problem opening the input netflow file. In process_netflow()') + print('[!] Error in process_netflow: cannot open the input netflow file.') print(type(inst)) # the exception instance print(inst.args) # arguments stored in .args print(inst) # __str__ allows args to printed directly sys.exit(-1) - # ---- Define the type of file + # Define the type of file headerline = input_file.readline() - # If there are no headers, get out. Most start with '#' but Argus starts with 'StartTime' and nfdump with 'Date' - if '#' not in headerline[0] and 'Date' not in headerline and 'StartTime' not in headerline and 'ts' not in headerline and 'timestamp' not in headerline: - print('The file has not headers. Please add them.') + # If there are no headers, do not process the file: + # - Zeek headers start with '#' + # - Argus headers start with 'StartTime' + # - nfdump headers start with 'Date' + #if '#' not in headerline[0] and 'Date' not in headerline and 'StartTime' not in headerline and 'ts' not in headerline and 'timestamp' not in headerline: + header_keywords = ['#', 'Date', 'StarTime', 'ts', 'timestamp'] + if not any(headerline.startswith(keyword) for keyword in header_keywords): + print('[!] Error in process_netflow: the input netflow file has not headers.') sys.exit(-1) filetype = define_type(headerline) From 39dabe7b60cd0d63f737f8644b115ce1a65d883b Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Mon, 21 Nov 2022 15:08:15 +0100 Subject: [PATCH 42/49] Add process_netflow more verbose output --- netflowlabeler.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 2932c31..a236e73 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -1260,18 +1260,22 @@ def process_netflow(labelmachine): # - Zeek headers start with '#' # - Argus headers start with 'StartTime' # - nfdump headers start with 'Date' - #if '#' not in headerline[0] and 'Date' not in headerline and 'StartTime' not in headerline and 'ts' not in headerline and 'timestamp' not in headerline: + # if '#' not in headerline[0] and 'Date' not in headerline and 'StartTime' not in headerline and 'ts' not in headerline and 'timestamp' not in headerline: header_keywords = ['#', 'Date', 'StarTime', 'ts', 'timestamp'] if not any(headerline.startswith(keyword) for keyword in header_keywords): print('[!] Error in process_netflow: the input netflow file has not headers.') sys.exit(-1) + # Attempt to automatically identify the type of file + # from the header of the netflow file filetype = define_type(headerline) if args.verbose > 0: - print(f'[+] Type of flow file to label: {filetype}') + print(f'[+] The input netflow file to label was identified as: {filetype}') # Create the output file for all cases output_file = open(args.netflowFile+'.labeled', 'w+') + if args.verbose > 0: + print(f"[+] The netflow file labeled can be found at: {args.netflowFile+'.labeled'}") # Store the headers in the output file output_netflow_line_to_file(output_file, headerline) From dbf98f21cfb7e54c9cdadb355adc7bbc47351c6f Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Mon, 21 Nov 2022 15:18:16 +0100 Subject: [PATCH 43/49] Add process_netflow more comments and format error msgs --- netflowlabeler.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index a236e73..280b39b 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -1272,7 +1272,7 @@ def process_netflow(labelmachine): if args.verbose > 0: print(f'[+] The input netflow file to label was identified as: {filetype}') - # Create the output file for all cases + # Create the output file to store the labeled netflows output_file = open(args.netflowFile+'.labeled', 'w+') if args.verbose > 0: print(f"[+] The netflow file labeled can be found at: {args.netflowFile+'.labeled'}") @@ -1280,7 +1280,8 @@ def process_netflow(labelmachine): # Store the headers in the output file output_netflow_line_to_file(output_file, headerline) - # ---- Define the columns + # Define the columns based on the type of the input netflow file + # and call the labeler function based on the detected type if filetype == 'zeek-json': column_idx = define_columns(headerline, filetype='json') amount_lines_processed = 0 @@ -1314,19 +1315,21 @@ def process_netflow(labelmachine): elif filetype == 'nfdump-tab': column_idx = define_columns(headerline, filetype='tab') amount_lines_processed = process_nfdump(column_idx, input_file, output_file, headerline, labelmachine) + else: + print(f"[!] Error in process_netflow: filetype not supported {filetype}") # Close the outputfile output_file.close() - print(f'Amount of lines read: {amount_lines_processed}') + print(f"[+] Labeling completed. Total number of flows read: {amount_lines_processed}") except Exception as inst: exception_line = sys.exc_info()[2].tb_lineno - print(f'Problem in process_netflow() line {exception_line}', 0, 1) + print(f'[!] Error in process_netflow() line {exception_line}', 0, 1) print(type(inst)) # the exception instance print(inst.args) # arguments stored in .args print(inst) # __str__ allows args to printed directly - exit(-1) + sys.exit(-1) def load_conditions(labelmachine): From 001f077fa870424204246c1e1b2d8c31c185f70e Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Mon, 21 Nov 2022 15:24:15 +0100 Subject: [PATCH 44/49] process_netflow: close input file for all types of files processed --- netflowlabeler.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 280b39b..56b5f55 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -1304,21 +1304,26 @@ def process_netflow(labelmachine): if '#fields' in headerline: fields_headerline = headerline headerline = input_file.readline() + # Store the rest of the zeek headers in the output file output_netflow_line_to_file(output_file, headerline, filetype='tab') + # Get the columns indexes column_idx = define_columns(fields_headerline, filetype='tab') + # Process the whole file amount_lines_processed = process_zeek(column_idx, input_file, output_file, labelmachine, filetype='tab') - # Close the netflow file - input_file.close() + elif filetype == 'nfdump-tab': column_idx = define_columns(headerline, filetype='tab') amount_lines_processed = process_nfdump(column_idx, input_file, output_file, headerline, labelmachine) else: print(f"[!] Error in process_netflow: filetype not supported {filetype}") - # Close the outputfile + # Close the input file + input_file.close() + + # Close the output file output_file.close() print(f"[+] Labeling completed. Total number of flows read: {amount_lines_processed}") From a607aee23b4b596944e18a9930e4375dbd42092f Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Mon, 21 Nov 2022 17:28:45 +0100 Subject: [PATCH 45/49] Increase verbosity level required to print flows and final labels --- netflowlabeler.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 56b5f55..2e19790 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -280,7 +280,7 @@ def getLabel(self, column_values): if args.debug > 0: print(f'\tNew label assigned: {genericlabelToVerify} {detailedlabelToVerify}') - if args.verbose > 0: + if args.verbose > 1: if 'Background' in labelToReturn: print(f'\tFinal label assigned: {labelToReturn}') else: @@ -846,6 +846,9 @@ def process_zeek(column_idx, input_file, output_file, labelmachine, filetype): amount_lines_processed = 0 column_values = {} + if args.verbose > 0: + print(f'[+] Labeling the flow file {args.netflowFile}') + # Read firstlines line = input_file.readline() @@ -853,11 +856,12 @@ def process_zeek(column_idx, input_file, output_file, labelmachine, filetype): while '#' in line: line = input_file.readline() + # Process each flow in input file while line: # Count the first line amount_lines_processed += 1 - if args.verbose > 0: + if args.verbose > 1: print(f'Netflow line: {line}', end='') if 'csv' in filetype or 'tab' in filetype: From 959895ce7e1eda3badca72258f5a77d4d37df3e7 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Mon, 21 Nov 2022 17:43:44 +0100 Subject: [PATCH 46/49] Add process_zeek more comments and format error msgs --- netflowlabeler.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 2e19790..c0e9c88 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -839,8 +839,11 @@ def define_type(data): def process_zeek(column_idx, input_file, output_file, labelmachine, filetype): """ - Process a Zeek file - The filetype can be: 'tab', 'csv', 'json' + Process and label a Zeek file using the label configuration. + Zeek files can have three distinct field separators: + - 'tab': currently supported + - 'csv': currently supported + - 'json': not implemented yet """ try: amount_lines_processed = 0 @@ -858,18 +861,21 @@ def process_zeek(column_idx, input_file, output_file, labelmachine, filetype): # Process each flow in input file while line: - # Count the first line + # Count the flows processed amount_lines_processed += 1 if args.verbose > 1: print(f'Netflow line: {line}', end='') + # Zeek files can be in csv, tab or JSON format + # Labeling CSV and TAB uses the same method if 'csv' in filetype or 'tab' in filetype: # Work with csv and tabs if 'csv' in filetype: separator = ',' elif 'tab' in filetype: separator = '\t' + # Transform the line into an array line_values = line.split(separator) @@ -883,7 +889,7 @@ def process_zeek(column_idx, input_file, output_file, labelmachine, filetype): column_values['pkts'] = '' column_values['ipbytes'] = '' - # Sum bytes + # bytes: total bytes. Calculated as the SUM of sbytes and dbytes # We do it like this because sometimes the column can be - or 0 if column_values['sbytes'] == '-': sbytes = 0 @@ -896,7 +902,7 @@ def process_zeek(column_idx, input_file, output_file, labelmachine, filetype): column_values['bytes'] = str(sbytes + dbytes) # print(f'New column bytes = {column_values["bytes"]}') - # Sum packets + # pkts: total packets. Calculated as the SUM of spkts and dpkts # We do it like this because sometimes the column can be - or 0 if column_values['spkts'] == '-': spkts = 0 @@ -909,7 +915,8 @@ def process_zeek(column_idx, input_file, output_file, labelmachine, filetype): column_values['pkts'] = str(spkts + dpkts) # print(f'New column pkst = {column_values["pkts"]}') - # Sum ip_bytes + # ipbytes: total transferred bytes. + # Calculated as the SUM of orig_ip_bytes and resp_ip_bytes. # We do it like this because sometimes the column can be - or 0 if column_values['orig_ip_bytes'] == '-': sip_bytes = 0 @@ -930,6 +937,7 @@ def process_zeek(column_idx, input_file, output_file, labelmachine, filetype): # Store the netflow output_netflow_line_to_file(output_file, line, filetype, genericlabel=genericlabel, detailedlabel=detailedlabel) + # Read next flow line ignoring comments line = input_file.readline() while '#' in line: line = input_file.readline() @@ -939,10 +947,11 @@ def process_zeek(column_idx, input_file, output_file, labelmachine, filetype): amount_lines_processed += 1 pass + # Returned number of labeled flows return amount_lines_processed except Exception as inst: exception_line = sys.exc_info()[2].tb_lineno - print(f'\tProblem in process_zeek() line {exception_line}', 0, 1) + print(f'\t[!] Error in process_zeek(): exception in line {exception_line}', 0, 1) print(str(type(inst)), 0, 1) print(str(inst), 0, 1) sys.exit(1) From 8339523c928dcb26904015b3c1781042417d2eab Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Mon, 21 Nov 2022 18:02:27 +0100 Subject: [PATCH 47/49] Add getLabel more comments and format errors msgs --- netflowlabeler.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index c0e9c88..ff62180 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -79,7 +79,7 @@ def addCondition(self, condition): print(f'\tCondition added: {condition}') except Exception as inst: - print('Problem in addCondition() in class labeler') + print('[!] Error in class labeler addCondition(): unable to add a condition') print(type(inst)) # the exception instance print(inst.args) # arguments stored in .args print(inst) # __str__ allows args to printed directly @@ -87,8 +87,13 @@ def addCondition(self, condition): def getLabel(self, column_values): """ - Get the values of the columns of a netflow line and return a label - Input: column_values is a dict, where each key is the standard field in a netflow + Get the values of the columns of a netflow line, + matche the labels conditions, and return a label. + + Input: + - column_values is a dict, where each key is the standard field in a netflow + Output: + - labelToReturn: return a tuple containing a generic and detailed label """ try: # Default to empty genericlabel and detailedlabel @@ -97,8 +102,11 @@ def getLabel(self, column_values): # Process all the conditions for group in self.conditionsGroup: # The first key of the group is the label to put + # Example: {'Botnet-SPAM': [[{'Proto': 'TCP'}, {'srcPort': '25'}], [{'Proto': 'TCP'}, {'dstPort': '25'}]]} labelline = list(group.keys())[0] genericlabelToVerify = labelline.split(',')[0].strip() + + # The detailed label may not be there, try to obtain it try: detailedlabelToVerify = labelline.split(',')[1].strip() except IndexError: @@ -106,7 +114,7 @@ def getLabel(self, column_values): detailedlabelToVerify = '(empty)' if args.debug > 0: - print(f'\tLabel to verify {labelline}') + print(f'\tLabel to verify {labelline}: {genericlabelToVerify} {detailedlabelToVerify}') orConditions = group[labelline] if args.debug > 0: @@ -288,7 +296,8 @@ def getLabel(self, column_values): return labelToReturn except Exception as inst: - print('Problem in getLabel() in class labeler') + print('Error in class labeler getLabel(): unable to label the given column values') + print(column_values) print(type(inst)) # the exception instance print(inst.args) # arguments stored in .args print(inst) # __str__ allows args to printed directly From e5de6c224ddc21d7ae33ca837687daa6a36dadf4 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Mon, 21 Nov 2022 18:28:46 +0100 Subject: [PATCH 48/49] Improve search of numeric columns. Fixes #9 --- netflowlabeler.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index ff62180..36c66f9 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -159,7 +159,11 @@ def getLabel(self, column_values): # Normal condition, no negation # Is the column a number? - if ('bytes' in condColumn) or ('packets' in condColumn) or ('srcport' in condColumn) or ('dstport' in condColumn) or ('sbytes' in condColumn) or ('dbyets' in condColumn) or ('spkts' in condColumn) or ('dpkts' in condColumn) or ('ip_orig_bytes' in condColumn) or ('ip_resp_bytes' in condColumn): + # if ('bytes' in condColumn) or ('packets' in condColumn) or ('srcport' in condColumn) or ('dstport' in condColumn) or ('sbytes' in condColumn) or ('dbyets' in condColumn) or ('spkts' in condColumn) or ('dpkts' in condColumn) or ('ip_orig_bytes' in condColumn) or ('ip_resp_bytes' in condColumn): + column_num_keywords = ['bytes', 'packets', 'srcport', 'dstport', + 'sbytes', 'dbytes', 'spkts', 'dpkts', + 'ip_orig_bytes', 'ip_resp_bytes'] + if any(keyword in condColumn for keyword in column_num_keywords): # It is a colum that we can treat as a number # Find if there is <, > or = in the condition if '>' in condColumn[-1]: @@ -167,6 +171,7 @@ def getLabel(self, column_values): netflowValue = column_values[condColumn] if args.debug > 0: print(f'\t\tTo compare field: {condColumn}, Condition value: {condValue}, Netflow value: {netflowValue}') + # Pay attention to directionality of condition 'condValue < flowvalue' if (int(condValue) < int(netflowValue)) or (condValue == 'all'): allTrue = True @@ -263,8 +268,9 @@ def getLabel(self, column_values): print('\t\t\tFalse') allTrue = False break + + # It is not a colum that we can treat as a number else: - # It is not a colum that we can treat as a number netflowValue = column_values[condColumn] if (condValue == netflowValue) or (condValue == 'all'): netflowValue = column_values[condColumn] @@ -293,10 +299,11 @@ def getLabel(self, column_values): print(f'\tFinal label assigned: {labelToReturn}') else: print(f'\tFinal label assigned: \x1b\x5b1;31;40m{labelToReturn}\x1b\x5b0;0;40m') + return labelToReturn except Exception as inst: - print('Error in class labeler getLabel(): unable to label the given column values') + print('[!] Error in class labeler getLabel(): unable to label the given column values') print(column_values) print(type(inst)) # the exception instance print(inst.args) # arguments stored in .args From 4ceab4eeb3a06009a3a71d987b04ebcd0f49b821 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Mon, 21 Nov 2022 19:02:47 +0100 Subject: [PATCH 49/49] E127 fix identation of line wrap --- netflowlabeler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/netflowlabeler.py b/netflowlabeler.py index 36c66f9..b503957 100755 --- a/netflowlabeler.py +++ b/netflowlabeler.py @@ -161,8 +161,8 @@ def getLabel(self, column_values): # Is the column a number? # if ('bytes' in condColumn) or ('packets' in condColumn) or ('srcport' in condColumn) or ('dstport' in condColumn) or ('sbytes' in condColumn) or ('dbyets' in condColumn) or ('spkts' in condColumn) or ('dpkts' in condColumn) or ('ip_orig_bytes' in condColumn) or ('ip_resp_bytes' in condColumn): column_num_keywords = ['bytes', 'packets', 'srcport', 'dstport', - 'sbytes', 'dbytes', 'spkts', 'dpkts', - 'ip_orig_bytes', 'ip_resp_bytes'] + 'sbytes', 'dbytes', 'spkts', 'dpkts', + 'ip_orig_bytes', 'ip_resp_bytes'] if any(keyword in condColumn for keyword in column_num_keywords): # It is a colum that we can treat as a number # Find if there is <, > or = in the condition