Skip to content

Commit

Permalink
Merge pull request #2633 from cta-observatory/write_shower_hist_hdf5
Browse files Browse the repository at this point in the history
Refactor how shower distributions are written, write them for all EventSources, implement for HDF5EventSource
  • Loading branch information
maxnoe authored Nov 12, 2024
2 parents 63295fa + e0e0bc9 commit ccfc288
Show file tree
Hide file tree
Showing 11 changed files with 118 additions and 76 deletions.
7 changes: 7 additions & 0 deletions docs/changes/2633.api.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Move the simulated shower distribution from something
that was specific to ``SimTelEventSource`` to a general interface
of ``EventSource``. Implement the new interface in both ``SimTelEventSource``
and ``HDF5EventSource`` and adapt writing of this information in ``DataWriter``.

This makes sure that the ``SimulatedShowerDistribution`` information is always
included, also when running ``ctapipe-process`` consecutively.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ filterwarnings = [
"error::ctapipe.utils.deprecation.CTAPipeDeprecationWarning",
"error::ctapipe.instrument.FromNameWarning",
"ignore:`np.MachAr` is deprecated:DeprecationWarning",
"ignore::ctapipe.core.provenance.MissingReferenceMetadata",
]
norecursedirs = [
".git",
Expand Down
64 changes: 6 additions & 58 deletions src/ctapipe/io/datawriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,10 @@

import numpy as np
import tables
from astropy import units as u
from traitlets import Dict, Instance

from ..containers import (
ArrayEventContainer,
SimulatedShowerDistribution,
TelescopeConfigurationIndexContainer,
TelEventIndexContainer,
)
Expand All @@ -26,7 +24,6 @@
from .datalevels import DataLevel
from .eventsource import EventSource
from .hdf5tableio import HDF5TableWriter
from .simteleventsource import SimTelEventSource
from .tableio import FixedPointColumnTransform, TelListToMaskTransform

__all__ = ["DataWriter", "DATA_MODEL_VERSION", "write_reference_metadata_headers"]
Expand Down Expand Up @@ -563,66 +560,17 @@ class ExtraSimInfo(Container):

self._writer.write("configuration/simulation/run", [extramc, config])

def write_simulation_histograms(self, event_source):
"""Write the distribution of thrown showers
Notes
-----
- this only runs if this is a simulation file. The current
implementation is a bit of a hack and implies we should improve
SimTelEventSource to read this info.
- Currently the histograms are at the end of the simtel file, so if
max_events is set to non-zero, the end of the file may not be read,
and this no histograms will be found.
"""
if not self._is_simulation:
self.log.debug("Not writing simulation histograms for observed data")
return

if not isinstance(event_source, SimTelEventSource):
self.log.debug("Not writing simulation for non-SimTelEventSource")
return
def write_simulated_shower_distributions(self, distributions):
"""Write the distribution of thrown showers."""

self.log.debug("Writing simulation histograms")

def fill_from_simtel(
obs_id, eventio_hist, container: SimulatedShowerDistribution
):
"""fill from a SimTel Histogram entry"""
container.obs_id = obs_id
container.hist_id = eventio_hist["id"]
container.n_entries = eventio_hist["entries"]
xbins = np.linspace(
eventio_hist["lower_x"],
eventio_hist["upper_x"],
eventio_hist["n_bins_x"] + 1,
)
ybins = np.linspace(
eventio_hist["lower_y"],
eventio_hist["upper_y"],
eventio_hist["n_bins_y"] + 1,
for container in distributions.values():
self._writer.write(
table_name="simulation/service/shower_distribution",
containers=container,
)

container.bins_core_dist = xbins * u.m
container.bins_energy = 10**ybins * u.TeV
container.histogram = eventio_hist["data"]
container.meta["hist_title"] = eventio_hist["title"]
container.meta["x_label"] = "Log10 E (TeV)"
container.meta["y_label"] = "3D Core Distance (m)"

hists = event_source.file_.histograms
if hists is not None:
hist_container = SimulatedShowerDistribution()
hist_container.prefix = ""
for hist in hists:
if hist["id"] == 6:
fill_from_simtel(self.event_source.obs_ids[0], hist, hist_container)
self._writer.write(
table_name="simulation/service/shower_distribution",
containers=hist_container,
)

def table_name(self, tel_id):
"""construct dataset table names depending on chosen split method"""
return f"tel_{tel_id:03d}"
Expand Down
14 changes: 13 additions & 1 deletion src/ctapipe/io/eventsource.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
ArrayEventContainer,
ObservationBlockContainer,
SchedulingBlockContainer,
SimulatedShowerDistribution,
SimulationConfigContainer,
)
from ..core import Provenance, ToolConfigurationError
Expand Down Expand Up @@ -288,7 +289,7 @@ def obs_ids(self) -> list[int]:
return list(self.observation_blocks.keys())

@property
def atmosphere_density_profile(self) -> AtmosphereDensityProfile:
def atmosphere_density_profile(self) -> AtmosphereDensityProfile | None:
"""atmosphere density profile that can be integrated to
convert between h_max and X_max. This should correspond
either to what was used in a simulation, or a measurement
Expand All @@ -301,6 +302,17 @@ def atmosphere_density_profile(self) -> AtmosphereDensityProfile:
"""
return None

@property
def simulated_shower_distributions(self) -> dict[int, SimulatedShowerDistribution]:
"""
The distribution of simulated showers for each obs_id.
Returns
-------
dict[int,ctapipe.containers.SimulatedShowerDistribution]
"""
return {}

@abstractmethod
def _generator(self) -> Generator[ArrayEventContainer, None, None]:
"""
Expand Down
19 changes: 19 additions & 0 deletions src/ctapipe/io/hdf5eventsource.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
SchedulingBlockContainer,
SimulatedEventContainer,
SimulatedShowerContainer,
SimulatedShowerDistribution,
SimulationConfigContainer,
TelescopeImpactParameterContainer,
TelescopePointingContainer,
Expand Down Expand Up @@ -231,6 +232,24 @@ def __init__(self, input_url=None, config=None, parent=None, **kwargs):
table.add_index("obs_id")
self._constant_telescope_pointing[tel_id] = table

self._simulated_shower_distributions = (
self._read_simulated_shower_distributions()
)

def _read_simulated_shower_distributions(self):
key = "/simulation/service/shower_distribution"
if key not in self.file_.root:
return {}

reader = HDF5TableReader(self.file_).read(
key, containers=SimulatedShowerDistribution
)
return {dist.obs_id: dist for dist in reader}

@property
def simulated_shower_distributions(self):
return self._simulated_shower_distributions

def __exit__(self, exc_type, exc_val, exc_tb):
self.close()

Expand Down
35 changes: 35 additions & 0 deletions src/ctapipe/io/simteleventsource.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
SimulatedCameraContainer,
SimulatedEventContainer,
SimulatedShowerContainer,
SimulatedShowerDistribution,
SimulationConfigContainer,
TelescopeImpactParameterContainer,
TelescopePointingContainer,
Expand Down Expand Up @@ -745,6 +746,40 @@ def is_compatible(file_path):
def subarray(self):
return self._subarray_info

@property
def simulated_shower_distributions(self) -> dict[int, SimulatedShowerDistribution]:
if self.file_.histograms is None:
warnings.warn(
"eventio file has no histograms (yet)."
" Histograms are stored at the end of the file and can only be read after all events have been consumed."
" Setting max-events will also prevent the histograms from being available."
)
return {}

shower_hist = next(hist for hist in self.file_.histograms if hist["id"] == 6)

xbins = np.linspace(
shower_hist["lower_x"],
shower_hist["upper_x"],
shower_hist["n_bins_x"] + 1,
)
ybins = np.linspace(
shower_hist["lower_y"],
shower_hist["upper_y"],
shower_hist["n_bins_y"] + 1,
)

container = SimulatedShowerDistribution(
obs_id=self.obs_id,
hist_id=shower_hist["id"],
n_entries=shower_hist["entries"],
bins_core_dist=u.Quantity(xbins, u.m, copy=False),
bins_energy=u.Quantity(10**ybins, u.TeV, copy=False),
histogram=shower_hist["data"],
)

return {self.obs_id: container}

def _generator(self):
if self.file_.tell() > self.start_pos:
self.file_._next_header_pos = 0
Expand Down
12 changes: 9 additions & 3 deletions src/ctapipe/io/tests/test_datawriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,9 @@ def test_write(tmpdir: Path):
calibrate(event)
generate_dummy_dl2(event)
writer(event)
writer.write_simulation_histograms(source)
writer.write_simulated_shower_distributions(
source.simulated_shower_distributions
)

assert output_path.exists()

Expand Down Expand Up @@ -174,7 +176,9 @@ def test_roundtrip(tmpdir: Path):
generate_dummy_dl2(event)
write(event)
events.append(deepcopy(event))
write.write_simulation_histograms(source)
write.write_simulated_shower_distributions(
source.simulated_shower_distributions
)
assert DataLevel.DL1_IMAGES in write.datalevels
assert DataLevel.DL1_PARAMETERS not in write.datalevels
assert DataLevel.DL2 in write.datalevels
Expand Down Expand Up @@ -233,7 +237,9 @@ def test_dl1writer_no_events(tmpdir: Path):
write_dl1_images=True,
) as writer:
writer.log.level = logging.DEBUG
writer.write_simulation_histograms(source)
writer.write_simulated_shower_distributions(
source.simulated_shower_distributions
)

assert output_path.exists()

Expand Down
8 changes: 8 additions & 0 deletions src/ctapipe/io/tests/test_hdf5eventsource.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,3 +262,11 @@ def test_interpolate_pointing(dl1_mon_pointing_file):
for pointing in e.pointing.tel.values():
assert not np.isnan(pointing.azimuth)
assert not np.isnan(pointing.altitude)


def test_simulated_events_distribution(dl1_file):
with HDF5EventSource(dl1_file) as source:
assert len(source.simulated_shower_distributions) == 1
dist = source.simulated_shower_distributions[1]
assert dist["n_entries"] == 1000
assert dist["histogram"].sum() == 1000.0
14 changes: 14 additions & 0 deletions src/ctapipe/io/tests/test_simteleventsource.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,3 +627,17 @@ def test_override_obs_id(override_obs_id, expected_obs_id, prod5_gamma_simtel_pa

for e in s:
assert e.index.obs_id == expected_obs_id


def test_shower_distribution(prod5_gamma_simtel_path):
with SimTelEventSource(prod5_gamma_simtel_path) as source:
with pytest.warns(match="eventio file has no"):
assert source.simulated_shower_distributions == {}

for e in source:
pass

distributions = source.simulated_shower_distributions
assert len(distributions) == 1
distribution = distributions[source.obs_id]
assert distribution.n_entries == 1000
17 changes: 3 additions & 14 deletions src/ctapipe/tools/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
DataLevel,
DataWriter,
EventSource,
SimTelEventSource,
metadata,
write_table,
)
Expand Down Expand Up @@ -195,18 +194,6 @@ def setup(self):

self.event_type_filter = EventTypeFilter(parent=self)

# warn if max_events prevents writing the histograms
if (
isinstance(self.event_source, SimTelEventSource)
and self.event_source.max_events
and self.event_source.max_events > 0
):
self.log.warning(
"No Simulated shower distributions will be written because "
"EventSource.max_events is set to a non-zero number (and therefore "
"shower distributions read from the input Simulation file are invalid)."
)

@property
def should_compute_dl2(self):
"""returns true if we should compute DL2 info"""
Expand Down Expand Up @@ -328,7 +315,9 @@ def finish(self):
"""
Last steps after processing events.
"""
self.write.write_simulation_histograms(self.event_source)
shower_dists = self.event_source.simulated_shower_distributions
self.write.write_simulated_shower_distributions(shower_dists)

self._write_processing_statistics()


Expand Down
3 changes: 3 additions & 0 deletions src/ctapipe/tools/tests/test_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,9 @@ def test_read_from_simtel_and_dl1(prod5_proton_simtel_path, tmp_path):
with TableLoader(dl2_from_dl1) as loader:
events_from_dl1 = loader.read_subarray_events()

with tables.open_file(dl2_from_dl1) as f:
assert "/simulation/service/shower_distribution" in f.root

# both files should contain identical data
assert_array_equal(events_from_simtel["event_id"], events_from_dl1["event_id"])

Expand Down

0 comments on commit ccfc288

Please sign in to comment.