-
Notifications
You must be signed in to change notification settings - Fork 2
/
video_generator.py
109 lines (86 loc) · 3.31 KB
/
video_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from gtts import gTTS
from pydub import AudioSegment
import random
import os
from PIL import Image, ImageDraw, ImageFont
from moviepy.editor import AudioFileClip, ImageClip
def create_image_with_text(image_path):
width, height = 720, 1280
image = Image.new('RGB', (width, height), 'white')
draw = ImageDraw.Draw(image)
text = "Sample Text Overlay"
font_size = 50
try:
# Load a font
font = ImageFont.truetype("arial.ttf", font_size)
except IOError:
# Use a default font
font = ImageFont.load_default()
# Calculate text size
text_width, text_height = draw.textbbox((0, 0), text, font=font)[2:]
# Calculate position
text_x = (width - text_width) // 2
text_y = (height - text_height) // 2
# Draw text on the image
draw.text((text_x, text_y), text, font=font, fill='black')
image.save(image_path)
def create_video_from_image_and_audio(image_path, audio_path, output_path):
# Load the generated image
image = ImageClip(image_path)
# Load the audio file
try:
audio = AudioFileClip(audio_path)
except Exception as e:
print(f"Error loading audio file: {e}")
return
# Check if the audio file has duration
if audio.duration <= 0:
print("Audio file has no duration.")
return
# Set the duration of the image to match the audio duration
image = image.set_duration(audio.duration)
# Set the image as the video clip
video = image.set_audio(audio)
# Output the final video
try:
video.write_videofile(output_path, fps=24, codec='libx264', audio_codec='aac')
print(f"Video saved as {output_path}")
except Exception as e:
print(f"Error creating video: {e}")
# Sample conversation with filler words and intentional silences
conversation = [
"So, I was thinking we could go to the park later.",
"But, umm, I’m not really sure if the weather will be good.",
"You know, I’ve been, uh, really busy with work lately.",
"Like, maybe we should plan for the weekend instead.",
"Hmm, but I don't know if I have time, like, to go out.",
"Anyway, let’s just see how things go."
]
# Create a list to hold audio segments
audio_segments = []
# Generate audio for each sentence and optionally add silence
for sentence in conversation:
# Save the sentence as audio using gTTS
tts = gTTS(text=sentence, lang='en')
tts.save("sentence.mp3")
# Load the sentence audio
sentence_audio = AudioSegment.from_file('sentence.mp3')
audio_segments.append(sentence_audio)
# Randomly add silence
if random.choice([True, False]):
silence_duration = random.randint(1500, 4500) # 1.5 to 4.5 seconds of silence
silence = AudioSegment.silent(duration=silence_duration)
audio_segments.append(silence)
# Combine all audio segments into one
final_audio = sum(audio_segments)
# Save the final audio
audio_file_path = "sample_conversation_with_fillers_and_silence.mp3"
final_audio.export(audio_file_path, format="mp3")
# Clean up temporary file
os.remove("sentence.mp3")
print(f"Audio file saved as {audio_file_path}")
# Create the image and video
image_path = "generated_image.jpg"
create_image_with_text(image_path)
output_path = "input_video.mp4"
create_video_from_image_and_audio(image_path, audio_file_path, output_path)