Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions dojo/db_migrations/0276_finding_lifecycle_event.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Generated by Django 5.2.14 on 2026-07-04 03:26

import django.db.models.deletion
import django.utils.timezone
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('dojo', '0275_usercontactinfo_user_state_details'),
]

operations = [
migrations.CreateModel(
name='Finding_Lifecycle_Event',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('actor_type', models.CharField(choices=[('import', 'Import pipeline'), ('dedupe', 'Deduplication'), ('jira', 'JIRA sync'), ('system', 'System')], default='system', editable=False, help_text='Which part of the platform produced this event.', max_length=20, verbose_name='Actor Type')),
('action', models.CharField(choices=[('created', 'Created'), ('closed', 'Closed'), ('reopened', 'Reopened'), ('marked_duplicate', 'Marked duplicate'), ('pushed_jira', 'Pushed to JIRA')], editable=False, help_text='The lifecycle transition that happened to the finding.', max_length=20, verbose_name='Action')),
('detail', models.JSONField(blank=True, default=dict, editable=False, help_text='Context for the transition: import/test ids, close reason, duplicate original, JIRA key.', verbose_name='Detail')),
('created', models.DateTimeField(default=django.utils.timezone.now, editable=False, verbose_name='Created')),
('finding', models.ForeignKey(db_constraint=False, editable=False, on_delete=django.db.models.deletion.DO_NOTHING, related_name='lifecycle_events', to='dojo.finding', verbose_name='Finding')),
],
options={
'indexes': [models.Index(fields=['finding', 'created'], name='finding_lifecycle_f_created'), models.Index(fields=['created'], name='finding_lifecycle_created')],
},
),
]
7 changes: 7 additions & 0 deletions dojo/finding/api/serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
Engagement,
Finding,
Finding_Group,
Finding_Lifecycle_Event,
Finding_Template,
Note_Type,
Product,
Expand Down Expand Up @@ -148,6 +149,12 @@ def to_representation(self, value):
return value


class FindingLifecycleEventSerializer(serializers.ModelSerializer):
class Meta:
model = Finding_Lifecycle_Event
fields = ("id", "action", "actor_type", "detail", "created")


class BurpRawRequestResponseSerializer(serializers.Serializer):
req_resp = RequestResponseSerializerField(required=True)

Expand Down
21 changes: 21 additions & 0 deletions dojo/finding/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
BurpRawRequestResponseSerializer,
FindingCloseSerializer,
FindingCreateSerializer,
FindingLifecycleEventSerializer,
FindingMetaSerializer,
FindingNoteSerializer,
FindingSerializer,
Expand All @@ -60,6 +61,7 @@
DojoMeta,
FileUpload,
Finding,
Finding_Lifecycle_Event,
Finding_Template,
NoteHistory,
Notes,
Expand Down Expand Up @@ -293,6 +295,25 @@ def verify(self, request, pk=None):
request=api_v2_serializers.TagSerializer,
responses={status.HTTP_201_CREATED: api_v2_serializers.TagSerializer},
)
@extend_schema(
methods=["GET"],
responses={
status.HTTP_200_OK: FindingLifecycleEventSerializer(many=True),
},
)
@action(detail=True, methods=["get"], url_path="lifecycle_events", permission_classes=(IsAuthenticated, permissions.UserHasFindingRelatedObjectPermission))
def lifecycle_events(self, request, pk=None):
"""
The finding's provenance timeline: created by which import, closed why,
marked duplicate of what, pushed to JIRA as which key.
"""
finding = self.get_object()
events = (
Finding_Lifecycle_Event.objects.filter(finding_id=finding.id)
.order_by("-created", "-id")[:500]
)
return Response(FindingLifecycleEventSerializer(events, many=True).data)

@action(detail=True, methods=["get", "post"], permission_classes=(IsAuthenticated, permissions.UserHasFindingRelatedObjectPermission))
def tags(self, request, pk=None):
finding = self.get_object()
Expand Down
13 changes: 12 additions & 1 deletion dojo/finding/deduplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
from django.db.models.query_utils import Q

from dojo.celery import app
from dojo.models import Endpoint_Status, Finding, System_Settings
from dojo.finding.lifecycle import record_lifecycle_event
from dojo.models import Endpoint_Status, Finding, Finding_Lifecycle_Event, System_Settings

logger = logging.getLogger(__name__)
deduplicationLogger = logging.getLogger("dojo.specific-loggers.deduplication")
Expand Down Expand Up @@ -192,6 +193,16 @@ def set_duplicate(new_finding, existing_finding, *, save=True):
logger.debug("saving existing finding: %d", existing_finding.id)
super(Finding, existing_finding).save(skip_validation=True)

# Provenance: record the dedupe decision with enough context to answer
# "why is this a duplicate?" (transitively re-pointed findings record
# their own event through the recursive call above)
record_lifecycle_event(
new_finding.id,
Finding_Lifecycle_Event.Action.MARKED_DUPLICATE,
{"original_id": existing_finding.id, "hash_code": new_finding.hash_code},
actor_type=Finding_Lifecycle_Event.ActorType.DEDUPE,
)

return all_modified


Expand Down
86 changes: 86 additions & 0 deletions dojo/finding/lifecycle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""
Finding lifecycle provenance.

Records SEMANTIC events on findings — created by import X, closed because it
was gone from a re-upload, marked duplicate of Y, pushed to JIRA as KEY —
the "why", which neither field-level history (pghistory triggers) nor the
per-import action records (Test_Import_Finding_Action) can express.

Write discipline (this table must never become a performance problem):
- Transition-only: no rows for "matched, nothing changed" reimports.
- Batched: importers collect events and bulk_create them per finding batch.
- No signals, no per-row saves; detail values are truncated.
- The FK carries no database constraint and on_delete=DO_NOTHING, so bulk
finding deletion never walks this table; orphans are swept by retention.
"""
import logging
from datetime import timedelta

from django.conf import settings
from django.utils import timezone

from dojo.celery import app
from dojo.models import Finding_Lifecycle_Event

logger = logging.getLogger(__name__)

_DETAIL_MAX_CHARS = 256
_PURGE_BATCH_SIZE = 10000


def lifecycle_events_enabled() -> bool:
return getattr(settings, "FINDING_LIFECYCLE_EVENTS_ENABLED", True)


def _truncate(value):
if isinstance(value, str) and len(value) > _DETAIL_MAX_CHARS:
return value[: _DETAIL_MAX_CHARS - 1] + "…"
return value


def lifecycle_event(finding_id, action, detail=None, actor_type=Finding_Lifecycle_Event.ActorType.IMPORT):
"""Build an unsaved event; persist via record_lifecycle_events."""
detail = {k: _truncate(v) for k, v in (detail or {}).items() if v is not None}
return Finding_Lifecycle_Event(
finding_id=finding_id,
actor_type=actor_type,
action=action,
detail=detail,
)


def record_lifecycle_events(events) -> None:
"""Bulk-persist events. Cheap no-op when disabled or empty."""
if not events or not lifecycle_events_enabled():
return
Finding_Lifecycle_Event.objects.bulk_create(events, batch_size=1000)


def record_lifecycle_event(finding_id, action, detail=None, actor_type=Finding_Lifecycle_Event.ActorType.IMPORT) -> None:
if not lifecycle_events_enabled():
return
record_lifecycle_events([lifecycle_event(finding_id, action, detail, actor_type)])


@app.task
def purge_finding_lifecycle_events(*args, **kwargs):
"""
Delete lifecycle events older than the retention window, in batches.
Also sweeps events orphaned by finding deletion (the FK intentionally
carries no constraint so deletes never pay for this table).
"""
retention_days = getattr(settings, "FINDING_LIFECYCLE_EVENTS_RETENTION_DAYS", 540)
cutoff = timezone.now() - timedelta(days=retention_days)
total = 0
while True:
batch_ids = list(
Finding_Lifecycle_Event.objects.filter(created__lt=cutoff)
.values_list("id", flat=True)[:_PURGE_BATCH_SIZE],
)
if not batch_ids:
break
deleted, _ = Finding_Lifecycle_Event.objects.filter(id__in=batch_ids).delete()
total += deleted
if total:
logger.info("purged %d finding lifecycle events older than %d days", total, retention_days)
return total
71 changes: 71 additions & 0 deletions dojo/finding/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1555,6 +1555,77 @@ class CWE(models.Model):
number = models.IntegerField()


class Finding_Lifecycle_Event(models.Model):

"""
Append-only provenance log of SEMANTIC finding transitions: created by
import X, closed because gone from a re-upload, marked duplicate of Y,
pushed to JIRA as KEY. Complements (does not duplicate) field-level
history and Test_Import_Finding_Action — this table records the WHY.

Deliberately skinny and delete-safe: the finding FK carries no database
constraint and on_delete=DO_NOTHING, so bulk finding deletion never
touches this table; orphans are swept by the retention purge task.
Writes are transition-only and batched (see dojo/finding/lifecycle.py).
"""

class ActorType(models.TextChoices):
IMPORT = "import", _("Import pipeline")
DEDUPE = "dedupe", _("Deduplication")
JIRA = "jira", _("JIRA sync")
SYSTEM = "system", _("System")

class Action(models.TextChoices):
CREATED = "created", _("Created")
CLOSED = "closed", _("Closed")
REOPENED = "reopened", _("Reopened")
MARKED_DUPLICATE = "marked_duplicate", _("Marked duplicate")
PUSHED_JIRA = "pushed_jira", _("Pushed to JIRA")

finding = models.ForeignKey(
"dojo.Finding",
on_delete=models.DO_NOTHING,
db_constraint=False,
related_name="lifecycle_events",
editable=False,
verbose_name=_("Finding"),
)
actor_type = models.CharField(
max_length=20,
choices=ActorType.choices,
default=ActorType.SYSTEM,
editable=False,
verbose_name=_("Actor Type"),
help_text=_("Which part of the platform produced this event."),
)
action = models.CharField(
max_length=20,
choices=Action.choices,
editable=False,
verbose_name=_("Action"),
help_text=_("The lifecycle transition that happened to the finding."),
)
detail = models.JSONField(
default=dict,
blank=True,
editable=False,
verbose_name=_("Detail"),
help_text=_("Context for the transition: import/test ids, close reason, duplicate original, JIRA key."),
)
created = models.DateTimeField(default=timezone.now, editable=False, verbose_name=_("Created"))

class Meta:
indexes = [
# the only hot read: one finding's timeline, newest first
models.Index(fields=["finding", "created"], name="finding_lifecycle_f_created"),
# retention purge scans by age
models.Index(fields=["created"], name="finding_lifecycle_created"),
]

def __str__(self):
return f"{self.action} finding {self.finding_id} ({self.actor_type})"


class BurpRawRequestResponse(models.Model):
finding = models.ForeignKey("dojo.Finding", blank=True, null=True, on_delete=models.CASCADE)
burpRequestBase64 = models.BinaryField()
Expand Down
8 changes: 8 additions & 0 deletions dojo/importers/base_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import dojo.finding.helper as finding_helper
import dojo.risk_acceptance.helper as ra_helper
from dojo.finding.lifecycle import record_lifecycle_event
from dojo.importers.options import ImporterOptions
from dojo.jira.services import is_keep_in_sync
from dojo.location.models import Location
Expand All @@ -27,6 +28,7 @@
Endpoint,
FileUpload,
Finding,
Finding_Lifecycle_Event,
Test,
Test_Import,
Test_Import_Finding_Action,
Expand Down Expand Up @@ -860,6 +862,12 @@ def mitigate_finding(
finding.save(dedupe_option=False, product_grading_option=product_grading_option)
else:
finding.save(dedupe_option=False, push_to_jira=(self.push_to_jira or is_keep_in_sync(finding, prefetched_jira_instance=self.jira_instance)), product_grading_option=product_grading_option)
# Provenance: record WHY the finding closed (close_old_findings / re-upload)
record_lifecycle_event(
finding.id,
Finding_Lifecycle_Event.Action.CLOSED,
{"test_id": self.test.id, "scan_type": self.scan_type, "reason": note_message},
)

def notify_scan_added(
self,
Expand Down
11 changes: 11 additions & 0 deletions dojo/importers/default_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@

from dojo.celery_dispatch import dojo_dispatch_task
from dojo.finding import helper as finding_helper
from dojo.finding.lifecycle import lifecycle_event, record_lifecycle_events
from dojo.importers.base_importer import BaseImporter, Parser
from dojo.importers.base_location_manager import LocationHandler
from dojo.importers.options import ImporterOptions
from dojo.jira import services as jira_services
from dojo.models import (
Engagement,
Finding,
Finding_Lifecycle_Event,
Test,
Test_Import,
)
Expand Down Expand Up @@ -287,6 +289,15 @@ def _process_findings_internal(
# their endpoints/locations) BEFORE post_process_findings_batch
# dispatches, so rules/dedup see inherited tags on .tags.
apply_inherited_tags_for_findings(batch_findings)
# Provenance: one CREATED lifecycle event per new finding, bulk-written per batch
record_lifecycle_events([
lifecycle_event(
f.id,
Finding_Lifecycle_Event.Action.CREATED,
{"test_id": self.test.id, "scan_type": self.scan_type, "kind": "import"},
)
for f in batch_findings
])
batch_findings.clear()
finding_ids_batch = list(batch_finding_ids)
batch_finding_ids.clear()
Expand Down
17 changes: 17 additions & 0 deletions dojo/importers/default_reimporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,15 @@
find_candidates_for_deduplication_unique_id,
find_candidates_for_reimport_legacy,
)
from dojo.finding.lifecycle import lifecycle_event, record_lifecycle_event, record_lifecycle_events
from dojo.importers.base_importer import BaseImporter, Parser
from dojo.importers.base_location_manager import LocationHandler
from dojo.importers.options import ImporterOptions
from dojo.jira import services as jira_services
from dojo.models import (
Development_Environment,
Finding,
Finding_Lifecycle_Event,
Notes,
Test,
Test_Import,
Expand Down Expand Up @@ -454,6 +456,16 @@ def _process_findings_internal(
# their original creation; re-running it on no-change reimports
# would be ~8 wasted queries per batch.
apply_inherited_tags_for_findings(new_findings_in_batch)
# Provenance: CREATED events only for findings this reimport actually
# created — matched/unchanged findings intentionally produce no rows
record_lifecycle_events([
lifecycle_event(
f.id,
Finding_Lifecycle_Event.Action.CREATED,
{"test_id": self.test.id, "scan_type": self.scan_type, "kind": "reimport"},
)
for f in new_findings_in_batch
])
new_findings_in_batch.clear()
batch_findings.clear()
finding_ids_batch = list(batch_finding_ids)
Expand Down Expand Up @@ -840,6 +852,11 @@ def process_matched_mitigated_finding(
self.location_handler.record_reactivations_for_finding(existing_finding)
existing_finding.notes.add(note)
self.reactivated_items.append(existing_finding)
record_lifecycle_event(
existing_finding.id,
Finding_Lifecycle_Event.Action.REOPENED,
{"test_id": self.test.id, "scan_type": self.scan_type, "reason": note_entry},
)
# The new finding is active while the existing on is mitigated. The existing finding needs to
# be updated in some way
# Return False here to make sure further processing happens
Expand Down
Loading
Loading