Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Cherry-Pick-Main][Server][SDL-5812] Updating project to allow locking/unlocking the schema. Adds support for a timestamp column #56

Merged
merged 1 commit into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions src/server/datamanager/migrations/0087_lock_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 3.2.12 on 2024-11-08 04:55

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("datamanager", "0086_label_info"),
]

operations = [
migrations.AddField(
model_name="project",
name="lock_schema",
field=models.BooleanField(default=False),
),
]
1 change: 1 addition & 0 deletions src/server/datamanager/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ class Project(models.Model):
image_file_name = models.CharField(
max_length=1100, null=True, unique=True, default=None
)
lock_schema = models.BooleanField(default=False)
last_modified = models.DateTimeField(auto_now=True)

def __unicode__(self):
Expand Down
7 changes: 6 additions & 1 deletion src/server/datamanager/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,12 @@ def get_capture_file(project_uuid: str, capture_file: str, ext: str) -> DataFram
datastore.get("{}".format(os.path.basename(capture_file)), capture_file)

if ext == ".csv":
tmp_df = read_csv(capture_file, index_col="sequence")
cols = list(read_csv(capture_file, nrows=1))
tmp_df = read_csv(
capture_file,
index_col="sequence",
usecols=[col for col in cols if col != "timestamp"],
)
elif ext == ".wav":
with wave.open(capture_file, "rb") as wave_reader:
waveFrames = wave_reader.readframes(wave_reader.getnframes())
Expand Down
9 changes: 3 additions & 6 deletions src/server/datamanager/serializers/capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,19 +79,16 @@ def validate_capture_file(capture, tmp_name):
for index, (key, item) in enumerate(
capture.project.capture_sample_schema.items()
):
if item.get("index") is None:
item["index"] = index
if item.get("index"):
item.pop("index")
update_capture_sample_schema = True

for key in reader.schema.keys():
if key not in capture.project.capture_sample_schema:
capture.project.capture_sample_schema[key] = reader.schema[key]
update_capture_sample_schema = True

if (
not settings.ALLOW_UPDATE_PROJECT_SCHEMA
and capture.project.capture_sample_schema
):
if capture.project.lock_schema:
project_columns = sorted(list(capture.project.capture_sample_schema.keys()))
capture_columns = sorted(list(reader.schema.keys()))
if project_columns != capture_columns:
Expand Down
1 change: 1 addition & 0 deletions src/server/datamanager/serializers/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,7 @@ class Meta:
"created_at",
"active_pipelines",
"description",
"lock_schema",
"last_modified",
)
read_only_fields = (
Expand Down
24 changes: 12 additions & 12 deletions src/server/datamanager/tests/utils/test_file_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,36 +114,36 @@ def test_csv_reader_data_mixed(filereader_mixed_int_float):
def test_csv_reader_schema_int_float(filereader_mixed_int_float):
schema = filereader_mixed_int_float.schema
assert {
"AccelerometerX": {"type": "int", "index": 0},
"AccelerometerY": {"type": "float", "index": 1},
"GyroscopeZ": {"type": "float", "index": 2},
"AccelerometerX": {"type": "int"},
"AccelerometerY": {"type": "float"},
"GyroscopeZ": {"type": "float"},
} == schema


def test_csv_reader_schema_float(filereader_float):
schema = filereader_float.schema
assert {
"AccelerometerX": {"type": "float", "index": 0},
"AccelerometerY": {"type": "float", "index": 1},
"GyroscopeZ": {"type": "float", "index": 2},
"AccelerometerX": {"type": "float"},
"AccelerometerY": {"type": "float"},
"GyroscopeZ": {"type": "float"},
} == schema


def test_csv_reader_schema_int(filereader_int):
schema = filereader_int.schema
assert {
"AccelerometerX": {"type": "int", "index": 0},
"AccelerometerY": {"type": "int", "index": 1},
"GyroscopeZ": {"type": "int", "index": 2},
"AccelerometerX": {"type": "int"},
"AccelerometerY": {"type": "int"},
"GyroscopeZ": {"type": "int"},
} == schema


def test_csv_reader_schema_int_space_fields(filereader_int_name_space_fields):
schema = filereader_int_name_space_fields.schema
assert {
"Accelerometer_X": {"type": "int", "index": 0},
"AccelerometerY": {"type": "int", "index": 1},
"GyroscopeZ": {"type": "int", "index": 2},
"Accelerometer_X": {"type": "int"},
"AccelerometerY": {"type": "int"},
"GyroscopeZ": {"type": "int"},
} == schema


Expand Down
100 changes: 100 additions & 0 deletions src/server/datamanager/tests/views/data/packet_loss_with_timestamp.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
timestamp,sequence,AccelerometerX,AccelerometerY,AccelerometerZ,GyroscopeX,GyroscopeY,GyroscopeZ
r,1000,-158,313,4173,0,-2,-226
r,1001,-146,368,4192,15,-5,-332
r,1002,-282,278,4120,-93,56,-408
r,1003,-332,179,4159,-19,14,-333
r,1004,-181,170,4166,29,-13,-153
r,1005,-148,154,4175,12,0,-51
r,1006,-197,234,4166,9,-1,-8
r,1007,-225,263,4160,10,-3,1
r,1008,-218,252,4154,10,-2,-1
r,1009,-215,254,4159,10,-3,-1
r,1010,-221,248,4180,9,-2,-1
r,1011,-217,250,4166,10,-4,-4
r,1012,-215,255,4165,9,-3,-3
r,1013,-217,256,4169,9,-2,-2
r,1014,-215,262,4163,10,-3,-2
r,1015,-213,264,4177,11,-2,-2
r,1016,-214,255,4161,10,-2,-2
r,1017,-213,248,4163,10,-2,-1
r,1018,-218,250,4172,10,-3,-1
r,1019,-211,254,4169,11,-4,-1
r,1020,-213,256,4169,10,-2,-3
r,1021,-220,250,4158,10,-3,-2
r,1022,-212,258,4168,10,-3,-2
r,1023,-202,257,4169,9,-2,-2
r,1024,-210,258,4161,9,-3,-2
r,1025,-215,256,4165,10,-4,-3
r,1026,-213,256,4163,11,-1,-2
r,1027,-210,256,4169,10,-2,-1
r,1028,-216,257,4164,9,-2,-1
r,1029,-217,253,4167,10,-1,-3
r,1030,-215,260,4171,11,-1,-2
r,1031,-213,255,4174,11,-3,0
r,1032,-214,256,4164,9,-3,-1
r,1033,-211,253,4169,8,-3,-2
r,1034,-209,253,4163,9,-2,-2
r,1035,-215,253,4159,9,-2,-2
r,1036,-215,253,4166,10,-4,-2
r,1037,-214,258,4161,11,-4,-2
r,1038,-213,256,4163,10,-3,-1
r,1039,-211,256,4163,10,-3,-2
r,1040,-212,255,4164,9,-2,-1
r,1041,-218,256,4172,9,-3,0
r,1042,-215,253,4171,10,-4,0
r,1043,-211,253,4164,10,-1,-1
r,1044,-215,260,4158,11,-2,-2
r,1045,-212,248,4169,10,-3,-3
r,1046,-213,256,4162,11,-2,-1
r,1047,-214,259,4160,10,-3,-1
r,1048,-217,256,4161,10,-4,-1
r,1049,-217,254,4175,11,-4,-2
r,1050,-210,256,4172,11,-4,-3
r,1051,-211,264,4162,10,-4,-3
r,1052,-210,259,4163,10,-3,-1
r,1053,-205,253,4168,9,-3,0
r,1054,-221,250,4164,10,-2,-2
r,1055,-221,255,4156,10,-3,-3
r,1056,-213,259,4162,10,-2,-1
r,1057,-218,254,4176,10,-2,-1
r,1058,-216,250,4167,9,-2,-1
r,1059,-206,257,4165,9,-2,-3
r,1060,-212,255,4161,10,-1,-1
r,1061,-214,252,4173,10,-2,-2
r,1062,-217,255,4188,10,-2,-2
r,1063,-224,255,4191,10,-2,-1
r,1064,-218,258,4180,9,-2,-3
r,1065,-221,256,4157,9,-3,-1
r,1066,-208,258,4147,10,-4,-1
r,1067,-211,257,4157,10,-2,-1
r,1068,-214,257,4166,10,-2,-2
r,1069,-209,253,4181,10,-2,-2
r,1070,-213,256,4179,12,-1,-2
r,1071,-212,257,4168,11,-3,-2
r,1072,-205,255,4155,9,-3,-1
r,1073,-215,261,4162,11,-2,-1
r,1074,-223,261,4168,10,-2,-2
r,1075,-212,264,4175,10,-2,-1
r,1076,-217,257,4187,9,-2,-2
r,1077,-214,258,4184,10,-4,-2
r,1078,-220,264,4171,9,-3,-2
r,1079,-211,284,4164,10,-2,-11
r,1080,-206,268,4163,11,-2,-58
r,1081,-222,233,4155,10,-4,-66
r,1082,-217,252,4164,11,-3,-17
r,1083,-214,261,4173,10,-3,3
r,1084,-217,258,4181,10,-3,-2
r,1086,-210,258,4171,10,-3,-2
r,1087,-203,253,4158,9,-2,-2
r,1088,-209,249,4167,10,-3,-2
r,1089,-215,252,4173,10,-2,-1
r,1090,-219,263,4172,10,-3,-1
r,1092,-205,258,4184,11,-4,0
r,1093,-211,261,4163,10,-3,-1
r,1094,-219,258,4166,10,-4,-2
r,1095,-217,251,4162,10,-3,-3
r,1096,-213,257,4164,10,-3,-2
r,1097,-210,254,4168,9,-4,-3
r,1098,-224,252,4169,10,-2,-2
r,1099,-219,261,4173,11,-3,-2
r,1120,-212,263,4167,10,-3,-3
92 changes: 79 additions & 13 deletions src/server/datamanager/tests/views/test_capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,62 @@ def test_create_csv_with_packet_loss(self, client, project):
assert r["name"] == "test_packet.csv"
assert r["file_size"] == 2943

def test_create_csv_with_timestamp(self, client, project):
project.save()
settings.DEBUG = True

capture_list_url = reverse(
"capture-list",
kwargs={"project_uuid": project.uuid},
)

dirname = os.path.dirname(__file__)

template_path = os.path.join(dirname, "data/packet_loss_with_timestamp.csv")
with open(template_path, "rb") as f:
response = client.post(
capture_list_url,
format="multipart",
data={"file": f, "name": "test_packet.csv"},
)

assert response.status_code == status.HTTP_201_CREATED

r = response.json()

assert r["max_sequence"] == 1120
assert r["number_samples"] == 99
assert r["name"] == "test_packet.csv"
assert r["file_size"] == 3151

from datamanager.query import get_capture_file

capture = Capture.objects.get(name="test_packet.csv")
capture_df = get_capture_file(project.uuid, capture.file, ".csv")
assert "timestamp" not in capture_df.columns

from engine.base import pipeline_utils
from datamanager.models import TeamMember

user = TeamMember.objects.get(email="unittest@sensiml.com").user
capture_df_pipeline_utils, _, _ = pipeline_utils.get_capturefile(
user, project.uuid, capture.name
)

assert "timestamp" not in capture_df_pipeline_utils.columns

capture_list_url = reverse(
"capture-file",
kwargs={"project_uuid": project.uuid, "uuid": capture.uuid},
)

response = client.get(capture_list_url)

assert (
response.data
== b"timestamp,sequence,AccelerometerX,AccelerometerY,AccelerometerZ,GyroscopeX,GyroscopeY,GyroscopeZ\r\nr,1000,-158,313,4173,0,-2,-226\r\nr,1001,-146,368,4192,15,-5,-332\r\nr,1002,-282,278,4120,-93,56,-408\r\nr,1003,-332,179,4159,-19,14,-333\r\nr,1004,-181,170,4166,29,-13,-153\r\nr,1005,-148,154,4175,12,0,-51\r\nr,1006,-197,234,4166,9,-1,-8\r\nr,1007,-225,263,4160,10,-3,1\r\nr,1008,-218,252,4154,10,-2,-1\r\nr,1009,-215,254,4159,10,-3,-1\r\nr,1010,-221,248,4180,9,-2,-1\r\nr,1011,-217,250,4166,10,-4,-4\r\nr,1012,-215,255,4165,9,-3,-3\r\nr,1013,-217,256,4169,9,-2,-2\r\nr,1014,-215,262,4163,10,-3,-2\r\nr,1015,-213,264,4177,11,-2,-2\r\nr,1016,-214,255,4161,10,-2,-2\r\nr,1017,-213,248,4163,10,-2,-1\r\nr,1018,-218,250,4172,10,-3,-1\r\nr,1019,-211,254,4169,11,-4,-1\r\nr,1020,-213,256,4169,10,-2,-3\r\nr,1021,-220,250,4158,10,-3,-2\r\nr,1022,-212,258,4168,10,-3,-2\r\nr,1023,-202,257,4169,9,-2,-2\r\nr,1024,-210,258,4161,9,-3,-2\r\nr,1025,-215,256,4165,10,-4,-3\r\nr,1026,-213,256,4163,11,-1,-2\r\nr,1027,-210,256,4169,10,-2,-1\r\nr,1028,-216,257,4164,9,-2,-1\r\nr,1029,-217,253,4167,10,-1,-3\r\nr,1030,-215,260,4171,11,-1,-2\r\nr,1031,-213,255,4174,11,-3,0\r\nr,1032,-214,256,4164,9,-3,-1\r\nr,1033,-211,253,4169,8,-3,-2\r\nr,1034,-209,253,4163,9,-2,-2\r\nr,1035,-215,253,4159,9,-2,-2\r\nr,1036,-215,253,4166,10,-4,-2\r\nr,1037,-214,258,4161,11,-4,-2\r\nr,1038,-213,256,4163,10,-3,-1\r\nr,1039,-211,256,4163,10,-3,-2\r\nr,1040,-212,255,4164,9,-2,-1\r\nr,1041,-218,256,4172,9,-3,0\r\nr,1042,-215,253,4171,10,-4,0\r\nr,1043,-211,253,4164,10,-1,-1\r\nr,1044,-215,260,4158,11,-2,-2\r\nr,1045,-212,248,4169,10,-3,-3\r\nr,1046,-213,256,4162,11,-2,-1\r\nr,1047,-214,259,4160,10,-3,-1\r\nr,1048,-217,256,4161,10,-4,-1\r\nr,1049,-217,254,4175,11,-4,-2\r\nr,1050,-210,256,4172,11,-4,-3\r\nr,1051,-211,264,4162,10,-4,-3\r\nr,1052,-210,259,4163,10,-3,-1\r\nr,1053,-205,253,4168,9,-3,0\r\nr,1054,-221,250,4164,10,-2,-2\r\nr,1055,-221,255,4156,10,-3,-3\r\nr,1056,-213,259,4162,10,-2,-1\r\nr,1057,-218,254,4176,10,-2,-1\r\nr,1058,-216,250,4167,9,-2,-1\r\nr,1059,-206,257,4165,9,-2,-3\r\nr,1060,-212,255,4161,10,-1,-1\r\nr,1061,-214,252,4173,10,-2,-2\r\nr,1062,-217,255,4188,10,-2,-2\r\nr,1063,-224,255,4191,10,-2,-1\r\nr,1064,-218,258,4180,9,-2,-3\r\nr,1065,-221,256,4157,9,-3,-1\r\nr,1066,-208,258,4147,10,-4,-1\r\nr,1067,-211,257,4157,10,-2,-1\r\nr,1068,-214,257,4166,10,-2,-2\r\nr,1069,-209,253,4181,10,-2,-2\r\nr,1070,-213,256,4179,12,-1,-2\r\nr,1071,-212,257,4168,11,-3,-2\r\nr,1072,-205,255,4155,9,-3,-1\r\nr,1073,-215,261,4162,11,-2,-1\r\nr,1074,-223,261,4168,10,-2,-2\r\nr,1075,-212,264,4175,10,-2,-1\r\nr,1076,-217,257,4187,9,-2,-2\r\nr,1077,-214,258,4184,10,-4,-2\r\nr,1078,-220,264,4171,9,-3,-2\r\nr,1079,-211,284,4164,10,-2,-11\r\nr,1080,-206,268,4163,11,-2,-58\r\nr,1081,-222,233,4155,10,-4,-66\r\nr,1082,-217,252,4164,11,-3,-17\r\nr,1083,-214,261,4173,10,-3,3\r\nr,1084,-217,258,4181,10,-3,-2\r\nr,1086,-210,258,4171,10,-3,-2\r\nr,1087,-203,253,4158,9,-2,-2\r\nr,1088,-209,249,4167,10,-3,-2\r\nr,1089,-215,252,4173,10,-2,-1\r\nr,1090,-219,263,4172,10,-3,-1\r\nr,1092,-205,258,4184,11,-4,0\r\nr,1093,-211,261,4163,10,-3,-1\r\nr,1094,-219,258,4166,10,-4,-2\r\nr,1095,-217,251,4162,10,-3,-3\r\nr,1096,-213,257,4164,10,-3,-2\r\nr,1097,-210,254,4168,9,-4,-3\r\nr,1098,-224,252,4169,10,-2,-2\r\nr,1099,-219,261,4173,11,-3,-2\r\nr,1120,-212,263,4167,10,-3,-3"
)

def test_create_csv_then_upload_wave(self, client, project):
project.save()
settings.DEBUG = True
Expand All @@ -210,8 +266,6 @@ def test_create_csv_then_upload_wave(self, client, project):
)
assert response.status_code == status.HTTP_201_CREATED

settings.ALLOW_UPDATE_PROJECT_SCHEMA = True

template_path = os.path.join(dirname, "data/on_4c77947d_nohash_0.wav")
with open(template_path, "rb") as f:
response = client.post(
Expand All @@ -231,14 +285,25 @@ def test_create_csv_then_upload_wave(self, client, project):
},
}

settings.ALLOW_UPDATE_PROJECT_SCHEMA = False
template_path = os.path.join(dirname, "data/on_4c77947d_nohash_0.wav")
with open(template_path, "rb") as f:
response = client.post(
capture_list_url,
format="multipart",
data={"file": f, "name": "window_test2.wav"},
)

assert response.status_code == status.HTTP_201_CREATED

project.lock_schema = True
project.save(update_fields=["lock_schema"])

template_path = os.path.join(dirname, "data/on_4c77947d_nohash_0.wav")
with open(template_path, "rb") as f:
response = client.post(
capture_list_url,
format="multipart",
data={"file": f, "name": "window_test_2.wav"},
data={"file": f, "name": "window_test_3.wav"},
)

assert response.status_code == status.HTTP_400_BAD_REQUEST
Expand All @@ -250,14 +315,15 @@ def test_create_csv_then_upload_wave(self, client, project):
],
}

settings.ALLOW_UPDATE_PROJECT_SCHEMA = True
project.lock_schema = False
project.save(update_fields=["lock_schema"])

template_path = os.path.join(dirname, "data/on_4c77947d_nohash_0.wav")
with open(template_path, "rb") as f:
response = client.post(
capture_list_url,
format="multipart",
data={"file": f, "name": "window_test_2.wav"},
data={"file": f, "name": "window_test_4.wav"},
)

assert response.status_code == status.HTTP_201_CREATED
Expand All @@ -266,13 +332,13 @@ def test_create_csv_then_upload_wave(self, client, project):

assert response.status_code == status.HTTP_200_OK
assert response.json()["capture_sample_schema"] == {
"channel_0": {"type": "int", "index": 0},
"GyroscopeX": {"type": "int", "index": 3},
"GyroscopeY": {"type": "int", "index": 4},
"GyroscopeZ": {"type": "int", "index": 5},
"AccelerometerX": {"type": "int", "index": 0},
"AccelerometerY": {"type": "int", "index": 1},
"AccelerometerZ": {"type": "int", "index": 2},
"channel_0": {"type": "int"},
"GyroscopeX": {"type": "int"},
"GyroscopeY": {"type": "int"},
"GyroscopeZ": {"type": "int"},
"AccelerometerX": {"type": "int"},
"AccelerometerY": {"type": "int"},
"AccelerometerZ": {"type": "int"},
}

def test_capture_stats_api_base(
Expand Down
13 changes: 7 additions & 6 deletions src/server/datamanager/utils/file_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,22 @@ def make_schema(dataframe):
invalid_columns = []
schema = {}
for index, dtype in enumerate(dataframe.dtypes):
if dtype not in ["int64", "float64"]:
if (
dtype not in ["int64", "float64"]
and dataframe.columns[index] != "timestamp"
):
invalid_columns.append(dataframe.columns[index])

column_dtype = None
if dtype in ["int64"]:
column_dtype = "int"
elif dtype in ["string"]:
column_dtype = "string"
elif dtype in ["float64"]:
column_dtype = "float"

schema[dataframe.columns[index].replace(" ", "_")] = {
"type": column_dtype,
"index": index,
}

if invalid_columns:
Expand Down Expand Up @@ -154,10 +158,7 @@ def __init__(self, file_path):
index="sequence"
)

self._schema = {
key: {"type": "int16", "index": index}
for index, key in enumerate(columns)
}
self._schema = {key: {"type": "int16"} for index, key in enumerate(columns)}

def to_CSVFileReader(self, tmp_file_path):
self._dataframe.to_csv(tmp_file_path, index=None)
Expand Down
Loading
Loading