Skip to content

Commit

Permalink
Testing anomaly detection with tensorflow
Browse files Browse the repository at this point in the history
  • Loading branch information
Matthias Niedermaier committed Dec 3, 2024
1 parent 08a7881 commit 8e07bc5
Show file tree
Hide file tree
Showing 7 changed files with 129 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .devcontainer/raspberry/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ ENV LANG en_US.UTF-8
ENV LANGUAGE en_US:en
ENV LC_ALL en_US.UTF-8

RUN python3 -m pip install pymodbus flask asyncua
RUN python3 -m pip install pymodbus flask asyncua tensorflow pandas numpy pyshark

RUN echo "ALL ALL=(ALL) NOPASSWD: ALL" >/etc/sudoers.d/nopasswd

Expand Down
7 changes: 4 additions & 3 deletions .devcontainer/virtual/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,17 @@ RUN apt-get update && apt-get install -y \
locales \
bc \
iputils-ping \
wget \
&& rm -rf /var/lib/apt/lists/*
wget

RUN DEBIAN_FRONTEND=noninteractive apt-get install -y tshark

RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen \
&& locale-gen
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US:en
ENV LC_ALL en_US.UTF-8

RUN python3 -m pip install pymodbus flask asyncua nicegui
RUN python3 -m pip install pymodbus flask asyncua nicegui tensorflow pandas numpy pyshark scikit-learn

RUN install -d -m 0755 /etc/apt/keyrings && \
wget -q https://packages.mozilla.org/apt/repo-signing-key.gpg -O- > /etc/apt/keyrings/packages.mozilla.org.asc && \
Expand Down
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ coverage.xml
.pytest_cache/
cover/

# do not push network captures / pcaps
.pcap

# do not push trained models
.keras

# do not push database
historian.sqlite
Expand Down
Binary file added training/anomaly_detection/modbus_tf_model.keras
Binary file not shown.
Binary file not shown.
110 changes: 110 additions & 0 deletions training/anomaly_detection/train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import pyshark
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam


def parse_modbus_layer_fields(pcap_file):
"""
Parse a PCAP file and extract all fields dynamically from the Modbus layer.
"""
print(f"Parsing Modbus TCP traffic from PCAP file: {pcap_file}")
capture = pyshark.FileCapture(pcap_file, display_filter="modbus")
traffic_data = []

for packet in capture:
if 'MODBUS' in packet:
modbus_layer = packet['MODBUS']
packet_data = {}
# Iterate through all field names in the Modbus layer
for field in modbus_layer.field_names:
try:
packet_data[field] = getattr(modbus_layer, field)
except AttributeError:
packet_data[field] = None # If field is not available
traffic_data.append(packet_data)

capture.close()
return pd.DataFrame(traffic_data)


def preprocess_data(data):
"""
Preprocess Modbus TCP data for TensorFlow.
"""
# Fill missing values and convert non-numeric fields to numeric
data = data.fillna(0)
for col in data.columns:
try:
data[col] = pd.to_numeric(data[col], errors='coerce').fillna(0)
except ValueError:
pass

# Normalize the data
normalized_data = (data - data.min()) / (data.max() - data.min())
return normalized_data


def build_and_train_model(data):
"""
Build and train a TensorFlow model on the Modbus TCP dataset.
"""
# Add a label column (0 = non-malicious, for this dataset)
data['label'] = 0

# Split features and labels
X = data.drop("label", axis=1).values
y = data["label"].values

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the TensorFlow model
model = Sequential([
Dense(64, input_dim=X_train.shape[1], activation='relu'),
Dropout(0.2),
Dense(32, activation='relu'),
Dropout(0.2),
Dense(16, activation='relu'),
Dense(1, activation='sigmoid') # Binary classification
])

model.compile(optimizer=Adam(learning_rate=0.001),
loss='binary_crossentropy',
metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, validation_data=(X_test, y_test),
epochs=50, batch_size=32, verbose=2)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")

# Save the trained model
model.save("modbus_tf_model.keras")
print("TensorFlow model saved as modbus_tf_model.keras")


if __name__ == "__main__":
# Replace with your actual PCAP file path
pcap_file = "modbus_traffic_regular.pcap"

# Step 1: Extract Modbus TCP traffic
modbus_data = parse_modbus_layer_fields(pcap_file)

if not modbus_data.empty:
print("Extracted Modbus Data:")
print(modbus_data.head())

# Step 2: Preprocess the data
preprocessed_data = preprocess_data(modbus_data)

# Step 3: Build and train the TensorFlow model
build_and_train_model(preprocessed_data)
else:
print("No Modbus TCP traffic found in the PCAP file.")
11 changes: 9 additions & 2 deletions training/detect_basic/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,15 @@ Common techniques include:
Detecting and mitigating network scanning on industrial systems is a critical first step in preventing cyberattacks.
By capturing and analyzing network traffic through PCAP files, security teams can identify early signs of an attack and take appropriate action before an adversary gains deeper access to the system.

Which ports do the attacker scan?
Which ports are open, and which are closed?
***Questions:***
* Which ports do the attacker scan?
* Which ports are open, and which are closed?

***!!! Execute the python script without looking into it !!!***

```sh
python3 recon.py <DEVICE_IP>
```

<details>
<summary><strong><span style="color:orange;font-weight: 900">Solution</span></strong></summary>
Expand Down

0 comments on commit 8e07bc5

Please sign in to comment.