diff --git a/.idea/SET2023.iml b/.idea/SET2023.iml index c1b03b6..3a67b25 100644 --- a/.idea/SET2023.iml +++ b/.idea/SET2023.iml @@ -2,7 +2,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 0291e62..6e7f6fc 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,5 +3,5 @@ - + \ No newline at end of file diff --git a/computer_vision/best_trash.pt b/computer_vision/best_trash.pt new file mode 100644 index 0000000..134fea9 Binary files /dev/null and b/computer_vision/best_trash.pt differ diff --git a/computer_vision/trash_detection.py b/computer_vision/trash_detection.py new file mode 100644 index 0000000..3a644b0 --- /dev/null +++ b/computer_vision/trash_detection.py @@ -0,0 +1,63 @@ +from ultralytics import YOLO +from PIL import Image +from http.server import BaseHTTPRequestHandler, HTTPServer +import io +import cv2 +import time +import requests + + +image_count = 0 +model = YOLO('best_trash.pt') + +def predict_image(image_path): + + # model.predict(image_path, save=True, imgsz=1280, conf=0.25, show_labels=True, show_conf=True, iou=0.5, + # line_width=3) + + #plot stuff onto image + im = cv2.imread(image_path) + result = model(im, imgsz=1280, conf=0.15, show_labels=True, show_conf=True, iou=0.5, line_width=3) + annotated_frame = result[0].plot() + # cv2.imshow('Result', annotated_frame) + # if cv2.waitKey(1) & 0xFF==ord("q"): + # return + # cv2.destroyAllWindows() + # cv2.waitKey(0) + + + # result boxes: all coords: x1, y1, x2, y2 + # result_boxes = result[0].boxes.xyxy.cpu().detach().numpy() + return result[0].tojson() + + +class HTTPRequestHandler(BaseHTTPRequestHandler): + + # POST method handler + def do_POST(self): + print("Got post") + global image_count + content_length = int(self.headers['Content-Length']) + post_data = self.rfile.read(content_length) + image_filename = 'images/' + str(image_count) + '.jpg' + image_count += 1 + with open(image_filename, 'wb') as f: + f.write(post_data) + + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.end_headers() + prediction = predict_image(image_filename) + print(prediction) + self.wfile.write(prediction.encode('utf-8')) + + +def main(): + server_address = ('', 8001) + httpd = HTTPServer(server_address, HTTPRequestHandler) + print('start server') + httpd.serve_forever() + + +if __name__ == "__main__": + main() diff --git a/computer_vision/trash_detection_client.py b/computer_vision/trash_detection_client.py new file mode 100644 index 0000000..72701aa --- /dev/null +++ b/computer_vision/trash_detection_client.py @@ -0,0 +1,28 @@ +import cv2, shutil + +def capture_image(): + # Open the camera + cap = cv2.VideoCapture(0) + + # Check if the camera is opened successfully + if not cap.isOpened(): + print("Error: Unable to open camera.") + return + + # Capture a frame + ret, frame = cap.read() + + if ret: + # Save the captured frame as an image + cv2.imwrite("frame.jpg", frame) + # Send frame to trash_detection.py + shutil.copy("frame.jpg", "trash_detection.pg") + else: + print("Error: Unable to capture image.") + + # Release the camera + cap.release() + +robot_on = True +while robot_on: + capture_image() \ No newline at end of file diff --git a/demo_computervision_display/index.html b/demo_computervision_display/index.html new file mode 100644 index 0000000..ff16a3a --- /dev/null +++ b/demo_computervision_display/index.html @@ -0,0 +1,40 @@ + + + + + + + Image Fetch + + + + Dynamic Image + + + diff --git a/demo_computervision_display/server.py b/demo_computervision_display/server.py new file mode 100644 index 0000000..ffeab43 --- /dev/null +++ b/demo_computervision_display/server.py @@ -0,0 +1,46 @@ +from http.server import BaseHTTPRequestHandler, HTTPServer +import os +from pathlib import Path + + +class HTTPRequestHandler(BaseHTTPRequestHandler): + + # POST method handler + def do_GET(self): + directory = '../computer_vision/images/' # Adjust the path to your image directory + + try: + # Get all image files in the directory + files = list(Path(directory).glob('*')) + # Filter files to only get those that are valid images + image_files = [file for file in files if file.suffix.lower() in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']] + print(image_files) + + # Find the oldest file based on creation time + oldest_file = max(image_files, key=os.path.getctime, default=None) + + if oldest_file is None: + self.send_error(404, "No image files found.") + return + + print(oldest_file) + + # Open the oldest image file and send it + with open(oldest_file, 'rb') as file: + self.send_response(200) + self.send_header('Content-type', 'image/jpeg') # Change if using different image types + self.end_headers() + self.wfile.write(file.read()) + except Exception as e: + self.send_error(500, f"Server Error: {e}") + + +def run(server_class=HTTPServer, handler_class=HTTPRequestHandler, port=8000): + server_address = ('', port) + httpd = server_class(server_address, handler_class) + print(f"Server starting on port {port}...") + httpd.serve_forever() + + +if __name__ == "__main__": + run() diff --git a/image_listener.py b/image_listener.py index f0215b5..2f4c255 100644 --- a/image_listener.py +++ b/image_listener.py @@ -2,7 +2,7 @@ # 1/23/2024 import time -import rospy +# import rospy import cv2 from std_msgs.msg import String from sensor_msgs.msg import Image diff --git a/main.py b/main.py index 58d3f9d..289aad0 100644 --- a/main.py +++ b/main.py @@ -1,29 +1,153 @@ +import os +from enum import IntEnum +import time +import shutil +import math + +from PIL import Image import cv2 -import serial +# import serial +import requests +from pp.distance_measurement import bounding_box_angle, distance_estimation +from pp.config import * + """ camera navigation movement sensors (lidar) navigation algorithm + + +e-meet camera +horizontal: 68.61668666 degrees +vertical: 65.68702522 z +at 640x480 """ +class RobotState(IntEnum): + SEEKING_TRASH = 0 + MOVING_TO_TRASH = 1 + AT_TRASH = 2 + + +class TrashTarget: + def __init__(self, horizontal_angle: float, distance: float): + self.horizontal_angle = horizontal_angle + self.distance = distance + + +class RobotStateVariables: + def __init__(self, robot_state: RobotState, trash_target: TrashTarget): + self.robot = Robot() + self.robot_state = robot_state + self.trash_target = trash_target + + +class Robot: + def rotate(self, angle_rad: float): + pass + + def move_forward(self, distance_meters: float): + pass + + def collect_trash(self): + pass + + +image_count = 0 +YOLO_INFERENCE_SERVER_ADDRESS = os.environ.get('YOLO_INFERENCE_SERVER_ADDRESS') +IMAGE_SAVE_DIR = os.environ.get('IMAGE_SAVE_DIR') + + +def setup(): + + if not os.path.exists(IMAGE_SAVE_DIR): + os.makedirs(IMAGE_SAVE_DIR) + else: + for filename in os.listdir(IMAGE_SAVE_DIR): + file_path = os.path.join(IMAGE_SAVE_DIR, filename) + try: + if os.path.isfile(file_path) or os.path.islink(file_path): + os.unlink(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) + except Exception as e: + print(f'Failed to delete {file_path}. Reason: {e}') + # make image saving directory if not already + # if it exists already clear it + # check if yolo inference server is running + + pass + + +def read_image(cap: cv2.VideoCapture, save_directory): + global image_count + _, frame = cap.read() + im = Image.fromarray(frame) + save_path = save_directory + '/' + str(image_count) + '.jpg' + image_count += 1 + im.save(save_path) + return save_path + + +def do_yolo_inference(image_path: str) -> dict: + with open(image_path, 'rb') as f: + data = f.read() + + print("Sending image") + r = requests.post(YOLO_INFERENCE_SERVER_ADDRESS, data=data) + resp = r.json() + return resp + + +def do_seeking_trash(robot_state: RobotStateVariables, cap: cv2.VideoCapture): + image_path = read_image(cap, IMAGE_SAVE_DIR) + yolo_results = do_yolo_inference(image_path) + if len(yolo_results) == 0: + return + first_piece_of_trash = yolo_results[0] + box = first_piece_of_trash['box'] + x1, y1, x2, y2 = box['x1'], box['y1'], box['x2'], box['y2'] + p = ((x1 + x2) / 2, y1) + res1 = bounding_box_angle([p], CAMERA_DIMENSIONS_PIXELS, CAMERA_FOV_RADIANS) + res2 = distance_estimation(res1[0], CAMERA_HEIGHT_ABOVE_GROUND_METERS) + + robot_state.trash_target = TrashTarget(res1[0][0], math.sqrt(pow(res2[0], 2) + pow(res2[1], 2))) + + +def do_moving_to_trash(robot_state: RobotStateVariables, cap: cv2.VideoCapture): + if robot_state.trash_target is None: + robot_state.robot_state = RobotState(0) + return + + robot_state.robot.rotate(robot_state.trash_target.horizontal_angle) + robot_state.robot.move_forward(robot_state.trash_target.distance) + + +def do_at_trash(robot_state: RobotStateVariables, cap: cv2.VideoCapture): + robot_state.robot.collect_trash() + + + +handler_functions = { + 0: do_seeking_trash, + 1: do_moving_to_trash, + 2: do_at_trash, +} + + def main(): - ser = serial.Serial('/dev/cu.usbmodem101', 9600) - - try: - distance = True - while True: - data = ser.readline().decode().strip() - if distance: - print("Distance:", data) - distance = not distance - else: - print("Strength:", data) - distance = not distance - except KeyboardInterrupt: - ser.close() + cap = cv2.VideoCapture('/dev/video0') + cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) + state = RobotStateVariables(RobotState(0), None) + setup() + + # main loop + while True: + (handler_functions[state.robot_state])(state, cap) + time.sleep(1) if __name__ == "__main__": diff --git a/periodic_image_capture.py b/periodic_image_capture.py index bbec1a7..5919a73 100644 --- a/periodic_image_capture.py +++ b/periodic_image_capture.py @@ -4,7 +4,7 @@ import cv2 import os import time -import rospy +# import rospy from sensor_msgs.msg import Image from cv_bridge import CvBridge diff --git a/pp/config.py b/pp/config.py index b885a3e..268b526 100644 --- a/pp/config.py +++ b/pp/config.py @@ -1,6 +1,6 @@ import math -CAMERA_FOV_RADIANS = (52.31 * math.pi / 180, 51.83 * math.pi / 180) +CAMERA_FOV_RADIANS = (68.61668666 * math.pi / 180, 65.68702522 * math.pi / 180) # horizontal, vertical CAMERA_DIMENSIONS_PIXELS = (640, 480) -CAMERA_HEIGHT_ABOVE_GROUND_METERS = 0.263 +CAMERA_HEIGHT_ABOVE_GROUND_METERS = .084 diff --git a/pp/distance_measurement.py b/pp/distance_measurement.py index 417096a..edfd0f3 100644 --- a/pp/distance_measurement.py +++ b/pp/distance_measurement.py @@ -3,10 +3,9 @@ import math import cv2 from typing import List, Tuple +from .config import * + -CAMERA_FOV_RADIANS = (52.31 * math.pi / 180, 51.83 * math.pi / 180) -CAMERA_DIMENSIONS_PIXELS = (640, 480) -CAMERA_HEIGHT_ABOVE_GROUND_METERS = 0.263 def conversion(x: int, c: int, theta: float): @@ -29,62 +28,69 @@ def distance_estimation(angles: Tuple[float, float], h: float) -> Tuple[float, f def main(): - vid = cv2.VideoCapture(0) - - p = (1, 1) - target = 0.0775 - differential = 0.0001 - - while True: - ret, frame = vid.read() - if not ret: - print("Failed to capture frame") - break - - l, r, m = 0, math.pi, 0 - - while (r - l) > differential: - m = (l + r) / 2 - - m_angles = bounding_box_angle( - [p] * 4, - CAMERA_DIMENSIONS_PIXELS, (m, CAMERA_FOV_RADIANS[1]))[0] - - distances = distance_estimation(m_angles, CAMERA_HEIGHT_ABOVE_GROUND_METERS) - - distance = distances[0] - - if distance > target: - r = m - differential - elif distance < target: - l = m + differential - else: - break - - # Convert distance to string for display - distance_str = "Distance: {:.2f} meters".format(distance) - - # Convert pixel measurements to string for display - pixel_measure_str = "Pixel measurement: {} pixels".format(p) - - # Update text on the frame - cv2.putText(frame, distance_str, (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) - cv2.putText(frame, pixel_measure_str, (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) - - # Display the frame - cv2.imshow('Webcam Feed', frame) - - # Wait for a small amount of time (1 millisecond) - # If 'q' is pressed, exit the loop - if cv2.waitKey(1) & 0xFF == ord('q'): - break - - # Read the next frame - ret, frame = vid.read() - - # Release the video capture object and close all windows - vid.release() - cv2.destroyAllWindows() + res1 = bounding_box_angle([(34, 346)], CAMERA_DIMENSIONS_PIXELS, CAMERA_FOV_RADIANS) + print(res1) + res2 = distance_estimation(res1[0], CAMERA_HEIGHT_ABOVE_GROUND_METERS) + print(res2) + + + + # vid = cv2.VideoCapture(0) + # + # p = (1, 1) + # target = 0.0775 + # differential = 0.0001 + # + # while True: + # ret, frame = vid.read() + # if not ret: + # print("Failed to capture frame") + # break + # + # l, r, m = 0, math.pi, 0 + # + # while (r - l) > differential: + # m = (l + r) / 2 + # + # m_angles = bounding_box_angle( + # [p] * 4, + # CAMERA_DIMENSIONS_PIXELS, (m, CAMERA_FOV_RADIANS[1]))[0] + # + # distances = distance_estimation(m_angles, CAMERA_HEIGHT_ABOVE_GROUND_METERS) + # + # distance = distances[0] + # + # if distance > target: + # r = m - differential + # elif distance < target: + # l = m + differential + # else: + # break + # + # # Convert distance to string for display + # distance_str = "Distance: {:.2f} meters".format(distance) + # + # # Convert pixel measurements to string for display + # pixel_measure_str = "Pixel measurement: {} pixels".format(p) + # + # # Update text on the frame + # cv2.putText(frame, distance_str, (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) + # cv2.putText(frame, pixel_measure_str, (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) + # + # # Display the frame + # cv2.imshow('Webcam Feed', frame) + # + # # Wait for a small amount of time (1 millisecond) + # # If 'q' is pressed, exit the loop + # if cv2.waitKey(1) & 0xFF == ord('q'): + # break + # + # # Read the next frame + # ret, frame = vid.read() + # + # # Release the video capture object and close all windows + # vid.release() + # cv2.destroyAllWindows() if __name__ == "__main__": diff --git a/requirements.txt b/requirements.txt index 8153502..5578272 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,7 @@ -matplotlib +ultralytics opencv-python numpy -serial +pyserial +python-dotenv +pillow +requests