Merge branch 'develop' into data_source_improvements_2

splunk · Feb 7, 2025 · e0d3694 · e0d3694
2 parents 7a6dcfc + 3eaad9a
commit e0d3694
Show file tree

Hide file tree

Showing 2,345 changed files with 64,927 additions and 51,353 deletions.
diff --git a/.github/workflows/appinspect.yml b/.github/workflows/appinspect.yml
@@ -18,7 +18,13 @@ jobs:
 
       - name: Install Python Dependencies and ContentCTL and Atomic Red Team
         run: |
-          pip install contentctl>=4.0.0
+          if [ -n "${{ vars.CONTENTCTL_VERSION }}" ]; then
+            echo "Installing contentctl version ${{ vars.CONTENTCTL_VERSION }}"
+            pip install contentctl==${{ vars.CONTENTCTL_VERSION }}
+          else
+            echo "Installing latest contentctl version"
+            pip install contentctl
+          fi
           git clone --depth=1 --single-branch --branch=master https://github.com/redcanaryco/atomic-red-team.git external_repos/atomic-red-team
           git clone --depth=1 --single-branch --branch=master https://github.com/mitre/cti external_repos/cti
       

diff --git a/.github/workflows/auto-update.yml b/.github/workflows/auto-update.yml
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -19,7 +19,13 @@ jobs:
 
       - name: Install Python Dependencies and ContentCTL and Atomic Red Team
         run: |
-          pip install contentctl>=4.0.0
+          if [ -n "${{ vars.CONTENTCTL_VERSION }}" ]; then
+            echo "Installing contentctl version ${{ vars.CONTENTCTL_VERSION }}"
+            pip install contentctl==${{ vars.CONTENTCTL_VERSION }}
+          else
+            echo "Installing latest contentctl version"
+            pip install contentctl
+          fi
           git clone --depth=1 --single-branch --branch=master https://github.com/redcanaryco/atomic-red-team.git external_repos/atomic-red-team
           git clone --depth=1 --single-branch --branch=master https://github.com/mitre/cti external_repos/cti
       

diff --git a/.github/workflows/unit-testing.yml b/.github/workflows/unit-testing.yml
@@ -23,7 +23,13 @@ jobs:
         - name: Install Python Dependencies and ContentCTL
           run: |
             python -m pip install --upgrade pip
-            pip install contentctl>=4.0.0
+            if [ -n "${{ vars.CONTENTCTL_VERSION }}" ]; then
+              echo "Installing contentctl version ${{ vars.CONTENTCTL_VERSION }}"
+              pip install contentctl==${{ vars.CONTENTCTL_VERSION }}
+            else
+              echo "Installing latest contentctl version"
+              pip install contentctl
+            fi
            
         # Running contentctl test with a few arguments, before running the command make sure you checkout into the current branch of the pull request. This step only performs unit testing on all the changes against the target-branch. In most cases this target branch will be develop
         # Make sure we check out the PR, even if it actually lives in a fork

diff --git a/baselines/baseline_of_blocked_outbound_traffic_from_aws.yml b/baselines/baseline_of_blocked_outbound_traffic_from_aws.yml
@@ -4,7 +4,7 @@ version: 1
 date: '2018-05-07'
 author: Bhavin Patel, Splunk
 type: Baseline
-datamodel: []
+status: production
 description: This search establishes, on a per-hour basis, the average and the standard
   deviation of the number of outbound connections blocked in your VPC flow logs by
   each source IP address (IP address of your EC2 instances). Also recorded is the
@@ -34,9 +34,4 @@ tags:
   - Splunk Enterprise
   - Splunk Enterprise Security
   - Splunk Cloud
-  required_fields:
-  - _time
-  - action
-  - src_ip
-  - dest_ip
   security_domain: network
diff --git a/baselines/baseline_of_cloud_infrastructure_api_calls_per_user.yml b/baselines/baseline_of_cloud_infrastructure_api_calls_per_user.yml
@@ -4,8 +4,7 @@ version: 1
 date: '2020-09-07'
 author: David Dorsey, Splunk
 type: Baseline
-datamodel:
-- Change
+status: production
 description: This search is used to build a Machine Learning Toolkit (MLTK) model
   for how many API calls are performed by each user. By default, the search uses the
   last 90 days of data to build the model and the model is rebuilt weekly. The model
@@ -40,14 +39,10 @@ tags:
   - Splunk Enterprise
   - Splunk Enterprise Security
   - Splunk Cloud
-  required_fields:
-  - _time
-  - All_Changes.user
-  - All_Changes.status
   security_domain: network
 deployment:
   scheduling:
     cron_schedule: 0 2 * * 0
     earliest_time: -90d@d
     latest_time: -1d@d
-    schedule_window: auto
+    schedule_window: auto
diff --git a/baselines/baseline_of_cloud_instances_destroyed.yml b/baselines/baseline_of_cloud_instances_destroyed.yml
@@ -4,8 +4,7 @@ version: 1
 date: '2020-08-25'
 author: David Dorsey, Splunk
 type: Baseline
-datamodel:
-- Change
+status: production
 description: This search is used to build a Machine Learning Toolkit (MLTK) model
   for how many instances are destroyed in the environment. By default, the search
   uses the last 90 days of data to build the model and the model is rebuilt weekly.
@@ -20,17 +19,16 @@ search: '| tstats count as instances_destroyed from datamodel=Change where All_C
   <= 5, 0, 1) | table _time instances_destroyed, HourOfDay, isWeekend | fit DensityFunction
   instances_destroyed by "HourOfDay,isWeekend" into cloud_excessive_instances_destroyed_v1
   dist=expon show_density=true'
-how_to_implement: 'You must have Enterprise Security 6.0 or later, if not you will
+how_to_implement: "You must have Enterprise Security 6.0 or later, if not you will
   need to verify that the Machine Learning Toolkit (MLTK) version 4.2 or later is
   installed, along with any required dependencies. Depending on the number of users
   in your environment, you may also need to adjust the value for max_inputs in the
   MLTK settings for the DensityFunction algorithm, then ensure that the search completes
   in a reasonable timeframe. By default, the search builds the model using the past
   30 days of data. You can modify the search window to build the model over a longer
   period of time, which may give you better results. You may also want to periodically
-  re-run this search to rebuild the model with the latest data.
-
-  More information on the algorithm used in the search can be found at `https://docs.splunk.com/Documentation/MLApp/4.2.0/User/Algorithms#DensityFunction`.'
+  re-run this search to rebuild the model with the latest data.\nMore information
+  on the algorithm used in the search can be found at `https://docs.splunk.com/Documentation/MLApp/4.2.0/User/Algorithms#DensityFunction`."
 known_false_positives: none
 references: []
 tags:
@@ -43,15 +41,10 @@ tags:
   - Splunk Enterprise
   - Splunk Enterprise Security
   - Splunk Cloud
-  required_fields:
-  - _time
-  - All_Changes.action
-  - All_Changes.status
-  - All_Changes.object_category
   security_domain: network
 deployment:
   scheduling:
     cron_schedule: 0 2 * * 0
     earliest_time: -90d@d
     latest_time: -1d@d
-    schedule_window: auto
+    schedule_window: auto
diff --git a/baselines/baseline_of_cloud_instances_launched.yml b/baselines/baseline_of_cloud_instances_launched.yml
@@ -4,8 +4,7 @@ version: 1
 date: '2020-08-14'
 author: David Dorsey, Splunk
 type: Baseline
-datamodel:
-- Change
+status: production
 description: This search is used to build a Machine Learning Toolkit (MLTK) model
   for how many instances are created in the environment. By default, the search uses
   the last 90 days of data to build the model and the model is rebuilt weekly. The
@@ -20,17 +19,16 @@ search: '| tstats count as instances_launched from datamodel=Change where (All_C
   <= 5, 0, 1) | table _time instances_launched, HourOfDay, isWeekend | fit DensityFunction
   instances_launched by "HourOfDay,isWeekend" into cloud_excessive_instances_created_v1
   dist=expon show_density=true'
-how_to_implement: 'You must have Enterprise Security 6.0 or later, if not you will
+how_to_implement: "You must have Enterprise Security 6.0 or later, if not you will
   need to verify that the Machine Learning Toolkit (MLTK) version 4.2 or later is
   installed, along with any required dependencies. Depending on the number of users
   in your environment, you may also need to adjust the value for max_inputs in the
   MLTK settings for the DensityFunction algorithm, then ensure that the search completes
   in a reasonable timeframe. By default, the search builds the model using the past
   90 days of data. You can modify the search window to build the model over a longer
   period of time, which may give you better results. You may also want to periodically
-  re-run this search to rebuild the model with the latest data.
-
-  More information on the algorithm used in the search can be found at `https://docs.splunk.com/Documentation/MLApp/4.2.0/User/Algorithms#DensityFunction`.'
+  re-run this search to rebuild the model with the latest data.\nMore information
+  on the algorithm used in the search can be found at `https://docs.splunk.com/Documentation/MLApp/4.2.0/User/Algorithms#DensityFunction`."
 known_false_positives: none
 references: []
 tags:
@@ -43,15 +41,10 @@ tags:
   - Splunk Enterprise
   - Splunk Enterprise Security
   - Splunk Cloud
-  required_fields:
-  - _time
-  - All_Changes.action
-  - All_Changes.status
-  - All_Changes.object_category
   security_domain: network
 deployment:
   scheduling:
     cron_schedule: 0 2 * * 0
     earliest_time: -90d@d
     latest_time: -1d@d
-    schedule_window: auto
+    schedule_window: auto
diff --git a/baselines/baseline_of_cloud_security_group_api_calls_per_user.yml b/baselines/baseline_of_cloud_security_group_api_calls_per_user.yml
@@ -4,8 +4,7 @@ version: 1
 date: '2020-09-07'
 author: David Dorsey, Splunk
 type: Baseline
-datamodel:
-- Change
+status: production
 description: This search is used to build a Machine Learning Toolkit (MLTK) model
   for how many API calls for security groups are performed by each user. By default,
   the search uses the last 90 days of data to build the model and the model is rebuilt
@@ -39,15 +38,10 @@ tags:
   - Splunk Enterprise
   - Splunk Enterprise Security
   - Splunk Cloud
-  required_fields:
-  - _time
-  - All_Changes.user
-  - All_Changes.status
-  - All_Changes.object_category
   security_domain: network
 deployment:
   scheduling:
     cron_schedule: 0 2 * * 0
     earliest_time: -90d@d
     latest_time: -1d@d
-    schedule_window: auto
+    schedule_window: auto
diff --git a/baselines/baseline_of_command_line_length___mltk.yml b/baselines/baseline_of_command_line_length___mltk.yml
@@ -4,7 +4,7 @@ version: 1
 date: '2019-05-08'
 author: Rico Valdez, Splunk
 type: Baseline
-datamodel: []
+status: production
 description: This search is used to build a Machine Learning Toolkit (MLTK) model
   to characterize the length of the command lines observed for each user in the environment.
   By default, the search uses the last 30 days of data to build the model. The model
@@ -24,7 +24,8 @@ how_to_implement: You must be ingesting endpoint data and populating the Endpoin
   the past 30 days of data. You can modify the search window to build the model over
   a longer period of time, which may give you better results. You may also want to
   periodically re-run this search to rebuild the model with the latest data. More
-  information on the algorithm used in the search can be found at `https://docs.splunk.com/Documentation/MLApp/4.2.0/User/Algorithms#DensityFunction`.
+  information on the algorithm used in the search can be found at 
+  `https://docs.splunk.com/Documentation/MLApp/4.2.0/User/Algorithms#DensityFunction`.
 known_false_positives: none
 references: []
 tags:
@@ -41,12 +42,6 @@ tags:
   - Splunk Enterprise
   - Splunk Enterprise Security
   - Splunk Cloud
-  required_fields:
-  - _time
-  - Processes.user
-  - Processes.dest
-  - Processes.process_name
-  - Processes.process
   security_domain: endpoint
 deployment:
   scheduling:

diff --git a/baselines/baseline_of_dns_query_length___mltk.yml b/baselines/baseline_of_dns_query_length___mltk.yml
@@ -4,8 +4,7 @@ version: 1
 date: '2019-05-08'
 author: Rico Valdez, Splunk
 type: Baseline
-datamodel:
-- Network_Resolution
+status: production
 description: This search is used to build a Machine Learning Toolkit (MLTK) model
   to characterize the length of the DNS queries for each DNS record type observed
   in the environment. By default, the search uses the last 30 days of data to build
@@ -22,7 +21,8 @@ how_to_implement: To successfully implement this search, you will need to ensure
   days of data. You can modify the search window to build the model over a longer
   period of time, which may give you better results. You may also want to periodically
   re-run this search to rebuild the model with the latest data. More information on
-  the algorithm used in the search can be found at `https://docs.splunk.com/Documentation/MLApp/4.2.0/User/Algorithms#DensityFunction`.
+  the algorithm used in the search can be found at 
+  `https://docs.splunk.com/Documentation/MLApp/4.2.0/User/Algorithms#DensityFunction`.
 known_false_positives: none
 references: []
 tags:
@@ -36,10 +36,6 @@ tags:
   - Splunk Enterprise
   - Splunk Enterprise Security
   - Splunk Cloud
-  required_fields:
-  - _time
-  - DNS.query
-  - DNS.record_type
   security_domain: network
 deployment:
   scheduling:

diff --git a/baselines/baseline_of_kubernetes_container_network_io.yml b/baselines/baseline_of_kubernetes_container_network_io.yml
@@ -4,29 +4,38 @@ version: 4
 date: '2024-09-24'
 author: Matthew Moore, Splunk
 type: Baseline
-datamodel: []
-description: This baseline rule calculates the average and standard deviation of inbound and outbound network IO for each Kubernetes container. 
-  It uses metrics from the Kubernetes API and the Splunk Infrastructure Monitoring Add-on. The rule generates a lookup table with the average and 
-  standard deviation of the network IO for each container. This baseline can be used to detect anomalies in network communication behavior, 
-  which may indicate security threats such as data exfiltration, command and control communication, or compromised container behavior.
-search: '| mstats avg(k8s.pod.network.io) as io where `kubernetes_metrics` by k8s.cluster.name k8s.pod.name k8s.node.name direction span=10s 
-  | eval service = replace(''k8s.pod.name'', "-\w{5}$|-[abcdef0-9]{8,10}-\w{5}$", "")
-  | eval key = ''k8s.cluster.name'' + ":" + ''service'' 
-  | stats avg(eval(if(direction="transmit", io,null()))) as avg_outbound_network_io avg(eval(if(direction="receive", io,null()))) as avg_inbound_network_io 
-        stdev(eval(if(direction="transmit", io,null()))) as stdev_outbound_network_io stdev(eval(if(direction="receive", io,null()))) as stdev_inbound_network_io
-        count latest(_time) as last_seen by key
-  | outputlookup k8s_container_network_io_baseline'
-how_to_implement: 'To implement this detection, follow these steps:
-  1. Deploy the OpenTelemetry Collector (OTEL) to your Kubernetes cluster.
-  2. Enable the hostmetrics/process receiver in the OTEL configuration.
-  3. Ensure that the process metrics, specifically Process.cpu.utilization and process.memory.utilization, are enabled.
-  4. Install the Splunk Infrastructure Monitoring (SIM) add-on (ref: https://splunkbase.splunk.com/app/5247)
-  5. Configure the SIM add-on with your Observability Cloud Organization ID and Access Token.
-  6. Set up the SIM modular input to ingest Process Metrics. Name this input "sim_process_metrics_to_metrics_index".
-  7. In the SIM configuration, set the Organization ID to your Observability Cloud Organization ID.
-  8. Set the Signal Flow Program to the following: data(''process.threads'').publish(label=''A''); data(''process.cpu.utilization'').publish(label=''B''); data(''process.cpu.time'').publish(label=''C''); data(''process.disk.io'').publish(label=''D''); data(''process.memory.usage'').publish(label=''E''); data(''process.memory.virtual'').publish(label=''F''); data(''process.memory.utilization'').publish(label=''G''); data(''process.cpu.utilization'').publish(label=''H''); data(''process.disk.operations'').publish(label=''I''); data(''process.handles'').publish(label=''J''); data(''process.threads'').publish(label=''K'')
-  9. Set the Metric Resolution to 10000.
-  10. Leave all other settings at their default values.'
+status: production
+description: This baseline rule calculates the average and standard deviation of inbound
+  and outbound network IO for each Kubernetes container. It uses metrics from the
+  Kubernetes API and the Splunk Infrastructure Monitoring Add-on. The rule generates
+  a lookup table with the average and standard deviation of the network IO for each
+  container. This baseline can be used to detect anomalies in network communication
+  behavior, which may indicate security threats such as data exfiltration, command
+  and control communication, or compromised container behavior.
+search: "| mstats avg(k8s.pod.network.io) as io where `kubernetes_metrics` by k8s.cluster.name
+  k8s.pod.name k8s.node.name direction span=10s | eval service = replace('k8s.pod.name',
+  \"-\\w{5}$|-[abcdef0-9]{8,10}-\\w{5}$\", \"\") | eval key = 'k8s.cluster.name' +
+  \":\" + 'service' | stats avg(eval(if(direction=\"transmit\", io,null()))) as avg_outbound_network_io
+  avg(eval(if(direction=\"receive\", io,null()))) as avg_inbound_network_io stdev(eval(if(direction=\"\
+  transmit\", io,null()))) as stdev_outbound_network_io stdev(eval(if(direction=\"\
+  receive\", io,null()))) as stdev_inbound_network_io count latest(_time) as last_seen
+  by key | outputlookup k8s_container_network_io_baseline"
+how_to_implement: "To implement this detection, follow these steps: 1. Deploy the
+  OpenTelemetry Collector (OTEL) to your Kubernetes cluster. 2. Enable the hostmetrics/process
+  receiver in the OTEL configuration. 3. Ensure that the process metrics, specifically
+  Process.cpu.utilization and process.memory.utilization, are enabled. 4. Install
+  the Splunk Infrastructure Monitoring (SIM) add-on (ref: https://splunkbase.splunk.com/app/5247)
+  5. Configure the SIM add-on with your Observability Cloud Organization ID and Access
+  Token. 6. Set up the SIM modular input to ingest Process Metrics. Name this input
+  \"sim_process_metrics_to_metrics_index\". 7. In the SIM configuration, set the Organization
+  ID to your Observability Cloud Organization ID. 8. Set the Signal Flow Program to
+  the following: data('process.threads').publish(label='A'); data('process.cpu.utilization').publish(label='B');
+  data('process.cpu.time').publish(label='C'); data('process.disk.io').publish(label='D');
+  data('process.memory.usage').publish(label='E'); data('process.memory.virtual').publish(label='F');
+  data('process.memory.utilization').publish(label='G'); data('process.cpu.utilization').publish(label='H');
+  data('process.disk.operations').publish(label='I'); data('process.handles').publish(label='J');
+  data('process.threads').publish(label='K') 9. Set the Metric Resolution to 10000.
+  10. Leave all other settings at their default values."
 known_false_positives: none
 references: []
 tags:
@@ -38,15 +47,10 @@ tags:
   - Splunk Enterprise
   - Splunk Enterprise Security
   - Splunk Cloud
-  required_fields:
-  - k8s.pod.network.io
-  - k8s.cluster.name 
-  - k8s.node.name
-  - k8s.pod.name  
   security_domain: network
 deployment:
   scheduling:
     cron_schedule: 0 2 * * 0
     earliest_time: -30d@d
     latest_time: -1d@d
-    schedule_window: auto
+    schedule_window: auto