Skip to content

Commit

Permalink
merge branch master into 'PRWLR-6173-false-positive-in-cloud-front-ch…
Browse files Browse the repository at this point in the history
…eck'
  • Loading branch information
danibarranqueroo committed Feb 5, 2025
2 parents 056a615 + 567c729 commit 49eea88
Show file tree
Hide file tree
Showing 55 changed files with 807 additions and 204 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/find-secrets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
with:
fetch-depth: 0
- name: TruffleHog OSS
uses: trufflesecurity/trufflehog@v3.88.2
uses: trufflesecurity/trufflehog@v3.88.4
with:
path: ./
base: ${{ github.event.repository.default_branch }}
Expand Down
2 changes: 1 addition & 1 deletion api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ description = "Prowler's API (Django/DRF)"
license = "Apache-2.0"
name = "prowler-api"
package-mode = false
version = "1.3.2"
version = "1.4.0"

[tool.poetry.dependencies]
celery = {extras = ["pytest"], version = "^5.4.0"}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import json
from datetime import datetime, timedelta, timezone

import django.db.models.deletion
from django.db import migrations, models
from django_celery_beat.models import PeriodicTask

from api.db_utils import rls_transaction
from api.models import Scan, StateChoices


def migrate_daily_scheduled_scan_tasks(apps, schema_editor):
for daily_scheduled_scan_task in PeriodicTask.objects.filter(
task="scan-perform-scheduled"
):
task_kwargs = json.loads(daily_scheduled_scan_task.kwargs)
tenant_id = task_kwargs["tenant_id"]
provider_id = task_kwargs["provider_id"]

current_time = datetime.now(timezone.utc)
scheduled_time_today = datetime.combine(
current_time.date(),
daily_scheduled_scan_task.start_time.time(),
tzinfo=timezone.utc,
)

if current_time < scheduled_time_today:
next_scan_date = scheduled_time_today
else:
next_scan_date = scheduled_time_today + timedelta(days=1)

with rls_transaction(tenant_id):
Scan.objects.create(
tenant_id=tenant_id,
name="Daily scheduled scan",
provider_id=provider_id,
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.SCHEDULED,
scheduled_at=next_scan_date,
scheduler_task_id=daily_scheduled_scan_task.id,
)


class Migration(migrations.Migration):
atomic = False

dependencies = [
("api", "0007_scan_and_scan_summaries_indexes"),
("django_celery_beat", "0019_alter_periodictasks_options"),
]

operations = [
migrations.AddField(
model_name="scan",
name="scheduler_task",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
to="django_celery_beat.periodictask",
),
),
migrations.RunPython(migrate_daily_scheduled_scan_tasks),
]
4 changes: 4 additions & 0 deletions api/src/backend/api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from django.db import models
from django.db.models import Q
from django.utils.translation import gettext_lazy as _
from django_celery_beat.models import PeriodicTask
from django_celery_results.models import TaskResult
from psqlextra.manager import PostgresManager
from psqlextra.models import PostgresPartitionedModel
Expand Down Expand Up @@ -410,6 +411,9 @@ class TriggerChoices(models.TextChoices):
started_at = models.DateTimeField(null=True, blank=True)
completed_at = models.DateTimeField(null=True, blank=True)
next_scan_at = models.DateTimeField(null=True, blank=True)
scheduler_task = models.ForeignKey(
PeriodicTask, on_delete=models.CASCADE, null=True, blank=True
)
# TODO: mutelist foreign key

class Meta(RowLevelSecurityProtectedModel.Meta):
Expand Down
2 changes: 1 addition & 1 deletion api/src/backend/api/specs/v1.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
openapi: 3.0.3
info:
title: Prowler API
version: 1.3.2
version: 1.4.0
description: |-
Prowler API specification.
Expand Down
4 changes: 0 additions & 4 deletions api/src/backend/api/v1/urls.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from django.conf import settings
from django.urls import include, path
from drf_spectacular.views import SpectacularRedocView
from rest_framework_nested import routers
Expand Down Expand Up @@ -113,6 +112,3 @@
path("schema", SchemaView.as_view(), name="schema"),
path("docs", SpectacularRedocView.as_view(url_name="schema"), name="docs"),
]

if settings.DEBUG:
urlpatterns += [path("silk/", include("silk.urls", namespace="silk"))]
2 changes: 1 addition & 1 deletion api/src/backend/api/v1/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ class SchemaView(SpectacularAPIView):

def get(self, request, *args, **kwargs):
spectacular_settings.TITLE = "Prowler API"
spectacular_settings.VERSION = "1.3.2"
spectacular_settings.VERSION = "1.4.0"
spectacular_settings.DESCRIPTION = (
"Prowler API specification.\n\nThis file is auto-generated."
)
Expand Down
6 changes: 0 additions & 6 deletions api/src/backend/config/django/devel.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,3 @@
) + ("api.filters.CustomDjangoFilterBackend",)

SECRETS_ENCRYPTION_KEY = "ZMiYVo7m4Fbe2eXXPyrwxdJss2WSalXSv3xHBcJkPl0="

MIDDLEWARE += [ # noqa: F405
"silk.middleware.SilkyMiddleware",
]

INSTALLED_APPS += ["silk"] # noqa: F405
55 changes: 36 additions & 19 deletions api/src/backend/tasks/beat.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@
from rest_framework_json_api.serializers import ValidationError
from tasks.tasks import perform_scheduled_scan_task

from api.models import Provider
from api.db_utils import rls_transaction
from api.models import Provider, Scan, StateChoices


def schedule_provider_scan(provider_instance: Provider):
tenant_id = str(provider_instance.tenant_id)
provider_id = str(provider_instance.id)

schedule, _ = IntervalSchedule.objects.get_or_create(
every=24,
period=IntervalSchedule.HOURS,
Expand All @@ -17,23 +21,9 @@ def schedule_provider_scan(provider_instance: Provider):
# Create a unique name for the periodic task
task_name = f"scan-perform-scheduled-{provider_instance.id}"

# Schedule the task
_, created = PeriodicTask.objects.get_or_create(
interval=schedule,
name=task_name,
task="scan-perform-scheduled",
kwargs=json.dumps(
{
"tenant_id": str(provider_instance.tenant_id),
"provider_id": str(provider_instance.id),
}
),
one_off=False,
defaults={
"start_time": datetime.now(timezone.utc) + timedelta(hours=24),
},
)
if not created:
if PeriodicTask.objects.filter(
interval=schedule, name=task_name, task="scan-perform-scheduled"
).exists():
raise ValidationError(
[
{
Expand All @@ -45,9 +35,36 @@ def schedule_provider_scan(provider_instance: Provider):
]
)

with rls_transaction(tenant_id):
scheduled_scan = Scan.objects.create(
tenant_id=tenant_id,
name="Daily scheduled scan",
provider_id=provider_id,
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.AVAILABLE,
scheduled_at=datetime.now(timezone.utc),
)

# Schedule the task
periodic_task_instance = PeriodicTask.objects.create(
interval=schedule,
name=task_name,
task="scan-perform-scheduled",
kwargs=json.dumps(
{
"tenant_id": tenant_id,
"provider_id": provider_id,
}
),
one_off=False,
start_time=datetime.now(timezone.utc) + timedelta(hours=24),
)
scheduled_scan.scheduler_task_id = periodic_task_instance.id
scheduled_scan.save()

return perform_scheduled_scan_task.apply_async(
kwargs={
"tenant_id": str(provider_instance.tenant_id),
"provider_id": str(provider_instance.id),
"provider_id": provider_id,
},
)
7 changes: 5 additions & 2 deletions api/src/backend/tasks/jobs/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,11 @@ def perform_prowler_scan(

status = FindingStatus[finding.status]
delta = _create_finding_delta(last_status, status)
# For the findings prior to the change, when a first finding is found with delta!="new" it will be assigned a current date as first_seen_at and the successive findings with the same UID will always get the date of the previous finding.
# For new findings, when a finding (delta="new") is found for the first time, the first_seen_at attribute will be assigned the current date, the following findings will get that date.
# For the findings prior to the change, when a first finding is found with delta!="new" it will be
# assigned a current date as first_seen_at and the successive findings with the same UID will
# always get the date of the previous finding.
# For new findings, when a finding (delta="new") is found for the first time, the first_seen_at
# attribute will be assigned the current date, the following findings will get that date.
if not last_first_seen_at:
last_first_seen_at = datetime.now(tz=timezone.utc)

Expand Down
49 changes: 31 additions & 18 deletions api/src/backend/tasks/tasks.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
from datetime import datetime, timedelta, timezone

from celery import shared_task
from config.celery import RLSTask
from django_celery_beat.models import PeriodicTask
from tasks.jobs.connection import check_provider_connection
from tasks.jobs.deletion import delete_provider, delete_tenant
from tasks.jobs.scan import aggregate_findings, perform_prowler_scan
from tasks.utils import get_next_execution_datetime

from api.db_utils import rls_transaction
from api.decorators import set_tenant
from api.models import Provider, Scan
from api.models import Scan, StateChoices


@shared_task(base=RLSTask, name="provider-connection-check")
Expand Down Expand Up @@ -100,28 +99,42 @@ def perform_scheduled_scan_task(self, tenant_id: str, provider_id: str):
task_id = self.request.id

with rls_transaction(tenant_id):
provider_instance = Provider.objects.get(pk=provider_id)
periodic_task_instance = PeriodicTask.objects.get(
name=f"scan-perform-scheduled-{provider_id}"
)
next_scan_date = datetime.combine(
datetime.now(timezone.utc), periodic_task_instance.start_time.time()
) + timedelta(hours=24)

scan_instance = Scan.objects.create(
next_scan_datetime = get_next_execution_datetime(task_id, provider_id)
scan_instance, _ = Scan.objects.get_or_create(
tenant_id=tenant_id,
name="Daily scheduled scan",
provider=provider_instance,
provider_id=provider_id,
trigger=Scan.TriggerChoices.SCHEDULED,
next_scan_at=next_scan_date,
task_id=task_id,
state__in=(StateChoices.SCHEDULED, StateChoices.AVAILABLE),
scheduler_task_id=periodic_task_instance.id,
defaults={"state": StateChoices.SCHEDULED},
)

result = perform_prowler_scan(
tenant_id=tenant_id,
scan_id=str(scan_instance.id),
provider_id=provider_id,
)
scan_instance.task_id = task_id
scan_instance.save()

try:
result = perform_prowler_scan(
tenant_id=tenant_id,
scan_id=str(scan_instance.id),
provider_id=provider_id,
)
except Exception as e:
raise e
finally:
with rls_transaction(tenant_id):
Scan.objects.get_or_create(
tenant_id=tenant_id,
name="Daily scheduled scan",
provider_id=provider_id,
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.SCHEDULED,
scheduled_at=next_scan_datetime,
scheduler_task_id=periodic_task_instance.id,
)

perform_scan_summary_task.apply_async(
kwargs={
"tenant_id": tenant_id,
Expand Down
4 changes: 4 additions & 0 deletions api/src/backend/tasks/tests/test_beat.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from rest_framework_json_api.serializers import ValidationError
from tasks.beat import schedule_provider_scan

from api.models import Scan


@pytest.mark.django_db
class TestScheduleProviderScan:
Expand All @@ -15,9 +17,11 @@ def test_schedule_provider_scan_success(self, providers_fixture):
with patch(
"tasks.tasks.perform_scheduled_scan_task.apply_async"
) as mock_apply_async:
assert Scan.all_objects.count() == 0
result = schedule_provider_scan(provider_instance)

assert result is not None
assert Scan.all_objects.count() == 1

mock_apply_async.assert_called_once_with(
kwargs={
Expand Down
Loading

0 comments on commit 49eea88

Please sign in to comment.