This project dataset is from Kaggle; it contains all the metadata on Netflix for TV shows and movies. The project is to simulate Real-time streaming for movie details using Kafka. We used different technologies such as Python, Amazon EC2, Apache Kafka, Glue, Athena, and SQL.
data:image/s3,"s3://crabby-images/8ae77/8ae7720075c12b8efcba574280fa104bb7970784" alt="Screen Shot 2024-03-26 at 1 09 49 PM"
data:image/s3,"s3://crabby-images/24737/24737be57486d3c7d8d87dc99c28c17f5973e007" alt="Screen Shot 2024-03-26 at 1 13 31 PM"
wget https://downloads.apache.org/kafka/3.7.0/kafka_2.13-3.7.0.tgz
tar -xvf kafka_2.13-3.7.0.tgz
sudo yum install java-1.8.0
java -version
sudo nano config/server.properties
bin/zookeeper-server-start.sh config/zookeeper.properties
export KAFKA_HEAP_OPTS="-Xmx256M -Xms128M"
cd kafka_2.13-3.7.0
bin/kafka-server-start.sh config/server.properties
bin/kafka-topics.sh --create --topic netflix_data --bootstrap-server {Put the Public IP of your EC2 Instance:9092} --replication-factor 1 --partitions 1
data:image/s3,"s3://crabby-images/bf88d/bf88d3412ba593e0a288147c610665fc2994d3f8" alt="Screen Shot 2024-03-26 at 1 24 34 PM"
cd kafka_2.13-3.7.0
bin/kafka-console-producer.sh --topic netflix_data --bootstrap-server {Put the Public IP of your EC2 Instance:9092}
cd kafka_2.13-3.7.0
bin/kafka-console-consumer.sh --topic netflix_data --bootstrap-server {Put the Public IP of your EC2 Instance:9092}
data:image/s3,"s3://crabby-images/7a5de/7a5deb3d1e113cb0762eeeca781c5bd288f4c53a" alt="Screen Shot 2024-03-26 at 1 27 22 PM"
data:image/s3,"s3://crabby-images/206a2/206a24ab0d9a2bfbbb867b0f1dbc82badc25bc54" alt="Screen Shot 2024-03-26 at 1 33 25 PM"
data:image/s3,"s3://crabby-images/d100b/d100befa724e1ab38304b61e315002a159a4feb5" alt="Screen Shot 2024-03-26 at 1 34 43 PM"
data:image/s3,"s3://crabby-images/8de13/8de13030b3f6462ce00f00cb225882117d5f82bf" alt="Screen Shot 2024-03-26 at 1 33 16 PM"
data:image/s3,"s3://crabby-images/1e420/1e42076150924fadbaee2beb71953ce6998d1459" alt="Screen Shot 2024-03-26 at 2 12 40 PM"
data:image/s3,"s3://crabby-images/87582/87582fe8697c4967f2dbd3d9a3f2cc33edbd9be0" alt="Screen Shot 2024-03-26 at 1 44 05 PM"
data:image/s3,"s3://crabby-images/e3eea/e3eea3ff5ccce1026a168581797d4e59fe47345e" alt="Screen Shot 2024-03-26 at 1 41 47 PM"
data:image/s3,"s3://crabby-images/2f2b5/2f2b518d2607229a809e4e0ea06b1905f70d233c" alt="Screen Shot 2024-03-26 at 1 44 48 PM"
data:image/s3,"s3://crabby-images/7928d/7928d6707c83aa0bc1b8f44eb37107e302c62faa" alt="Screen Shot 2024-03-26 at 1 46 33 PM"
data:image/s3,"s3://crabby-images/276bd/276bd4053f22707f43e2d70941d0f4877df468f3" alt="Screen Shot 2024-03-26 at 1 47 52 PM"
SELECT * FROM "netflix_movies_db"."gakas_kafka_netflix_data" WHERE release_year=2020;
data:image/s3,"s3://crabby-images/cea70/cea7073decd9b1c2cce3b354ca03823db80a6a49" alt="Screen Shot 2024-03-26 at 1 56 07 PM"
data:image/s3,"s3://crabby-images/eb873/eb87308424ff4ce6101f5e77f5f588e0d1ecda4a" alt="Screen Shot 2024-03-26 at 1 56 16 PM"
SELECT type,count(*) FROM "netflix_movies_db"."gakas_kafka_netflix_data" Group BY type;
data:image/s3,"s3://crabby-images/35264/352641278703266876803d50b53ca2068f3aa695" alt="Screen Shot 2024-03-26 at 2 04 23 PM"