Skip to content

Commit

Permalink
Merge branch 'branch-24.03' of https://github.com/nv-morpheus/Morpheus
Browse files Browse the repository at this point in the history
…into gnn-training-notebook-fix
  • Loading branch information
efajardo-nv committed Jan 8, 2024
2 parents 7d5ee7d + 2e3f4e4 commit 563d941
Show file tree
Hide file tree
Showing 29 changed files with 541 additions and 872 deletions.
4 changes: 2 additions & 2 deletions ci/conda/recipes/morpheus/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -110,7 +110,7 @@ outputs:
- {{ pin_compatible('cudatoolkit', min_pin='x.x', max_pin='x') }}
test:
requires:
- gputil
- pynvml
- pytest
- pytest-cov
- pytest-benchmark
Expand Down
1 change: 0 additions & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ dependencies:
- flake8
- gcc_linux-64=11.2
- git-lfs
- gputil
- grpcio
- gxx_linux-64=11.2
- huggingface_hub=0.10.1
Expand Down
1 change: 0 additions & 1 deletion conda/environments/dev_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ dependencies:
- flake8
- gcc_linux-64=11.2
- git-lfs
- gputil
- grpcio
- gxx_linux-64=11.2
- include-what-you-use=0.20
Expand Down
3 changes: 1 addition & 2 deletions dependencies.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -235,7 +235,6 @@ dependencies:
- dill
- elasticsearch==8.9.0
- feedparser=6.0.10
- gputil
- grpcio
- mlflow>=2.2.1,<3
- nb_conda_kernels
Expand Down
3 changes: 1 addition & 2 deletions docker/conda/environments/cuda11.8_dev.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -57,7 +57,6 @@ dependencies:
- git>=2.35.3 # Needed for wildcards on safe.directory
- glog=0.6
- gmock>=1.13.0
- gputil
- grpcio
- gtest>=1.13.0
- gxx_linux-64=11.2
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -17,7 +17,8 @@
import json
from os import path

import GPUtil
from pynvml.smi import NVSMI_QUERY_GPU
from pynvml.smi import nvidia_smi

from benchmarks.test_bench_e2e_dfp_pipeline import PIPELINES_CONF

Expand All @@ -32,18 +33,40 @@ def pytest_benchmark_update_json(config, benchmarks, output_json): # pylint:dis

curr_dir = path.dirname(path.abspath(__file__))

gpus = GPUtil.getGPUs()

for i, gpu in enumerate(gpus):
# output_json["machine_info"]["gpu_" + str(i)] = gpu.name
output_json["machine_info"]["gpu_" + str(i)] = {}
output_json["machine_info"]["gpu_" + str(i)]["id"] = gpu.id
output_json["machine_info"]["gpu_" + str(i)]["name"] = gpu.name
output_json["machine_info"]["gpu_" + str(i)]["load"] = f"{gpu.load*100}%"
output_json["machine_info"]["gpu_" + str(i)]["free_memory"] = f"{gpu.memoryFree}MB"
output_json["machine_info"]["gpu_" + str(i)]["used_memory"] = f"{gpu.memoryUsed}MB"
output_json["machine_info"]["gpu_" + str(i)]["temperature"] = f"{gpu.temperature} C"
output_json["machine_info"]["gpu_" + str(i)]["uuid"] = gpu.uuid
query_opts = NVSMI_QUERY_GPU.copy()
nvsmi = nvidia_smi.getInstance()
device_query = nvsmi.DeviceQuery([
query_opts["driver_version"],
query_opts["count"],
query_opts["index"],
query_opts["gpu_name"],
query_opts["gpu_uuid"],
query_opts["memory.total"],
query_opts["memory.used"],
query_opts["memory.free"],
query_opts["utilization.gpu"],
query_opts["utilization.memory"],
query_opts["temperature.gpu"]
])

output_json["machine_info"]["gpu_driver_version"] = device_query["driver_version"]

for gpu in device_query["gpu"]:
gpu_num = gpu["minor_number"]
output_json["machine_info"]["gpu_" + gpu_num] = {}
output_json["machine_info"]["gpu_" + gpu_num]["id"] = gpu_num
output_json["machine_info"]["gpu_" + gpu_num]["name"] = gpu["product_name"]
output_json["machine_info"][
"gpu_" + gpu_num]["utilization"] = f"{gpu['utilization']['gpu_util']}{gpu['utilization']['unit']}"
output_json["machine_info"][
"gpu_" + gpu_num]["total_memory"] = f"{gpu['fb_memory_usage']['total']} {gpu['fb_memory_usage']['unit']}"
output_json["machine_info"][
"gpu_" + gpu_num]["used_memory"] = f"{gpu['fb_memory_usage']['used']} {gpu['fb_memory_usage']['unit']}"
output_json["machine_info"][
"gpu_" + gpu_num]["free_memory"] = f"{gpu['fb_memory_usage']['free']} {gpu['fb_memory_usage']['unit']}"
output_json["machine_info"][
"gpu_" + gpu_num]["temperature"] = f"{gpu['temperature']['gpu_temp']} {gpu['temperature']['unit']}"
output_json["machine_info"]["gpu_" + gpu_num]["uuid"] = gpu["uuid"]

for bench in output_json['benchmarks']:

Expand Down
2 changes: 1 addition & 1 deletion examples/log_parsing/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ PYTHONPATH="examples/log_parsing" \
morpheus --log_level INFO \
--plugin "inference" \
--plugin "postprocessing" \
run --num_threads 1 --use_cpp False --pipeline_batch_size 1024 --model_max_batch_size 32 \
run --num_threads 1 --pipeline_batch_size 1024 --model_max_batch_size 32 \
pipeline-nlp \
from-file --filename ./models/datasets/validation-data/log-parsing-validation-data-input.csv \
deserialize \
Expand Down
Loading

0 comments on commit 563d941

Please sign in to comment.