-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathvideo.py
172 lines (145 loc) · 6.43 KB
/
video.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import os
import json
import requests
from datetime import datetime
import uuid
from moviepy.editor import ImageClip, AudioFileClip, TextClip, CompositeVideoClip
from moviepy.video.fx.all import resize
from PIL import Image
from openai import OpenAI
from concurrent.futures import ThreadPoolExecutor
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
openai_client = OpenAI(api_key=OPENAI_API_KEY)
def log(category, message):
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
log_message = f"[{timestamp}] [{category.upper()}] {message}"
print(log_message)
with open("video_generation.log", "a") as log_file:
log_file.write(log_message + "\n")
def generate_default_image(temp_folder: str, prompt="A modern and sleek AI-themed image for AI News Pod cover art.") -> str:
log("IMAGE_GEN", "Generating default image with DALL·E...")
response = openai_client.images.generate(
model="dall-e-3",
prompt=prompt,
size="1024x1024",
n=1,
)
image_url = response.data[0].url
log("IMAGE_GEN", f"Image generated: {image_url}")
# Download and save the image
image_response = requests.get(image_url)
image_path = os.path.join(temp_folder, "default_image.png")
with open(image_path, "wb") as img_file:
img_file.write(image_response.content)
log("IMAGE_GEN", f"Image saved to: {image_path}")
return image_path
def resize_image(image, newsize):
return image.resize(newsize, Image.LANCZOS)
def create_text_clip(segment, video_width, video_height):
return TextClip(
segment['text'],
fontsize=segment.get('font_size', 48),
font='Arial-Bold',
color='white',
bg_color='black',
stroke_color='black',
stroke_width=2,
method='caption',
size=(video_width * 0.9, None) # Dynamic height based on text
).set_position(segment.get('position', ('center', video_height - video_height // 3))) \
.set_start(segment['start_time']) \
.set_duration(segment['duration'])
def split_segment(segment, video_height, num_subsegments=4):
words = segment['text'].split()
avg_words = max(1, len(words) // num_subsegments)
subsegments = []
for sub_i in range(num_subsegments):
start_word = sub_i * avg_words
end_word = start_word + avg_words
sub_text = ' '.join(words[start_word:end_word])
sub_start = segment['start_time'] + sub_i * (segment['duration'] / num_subsegments)
sub_duration = segment['duration'] / num_subsegments
subsegments.append({
'text': sub_text,
'start_time': sub_start,
'duration': sub_duration,
'font_size': segment.get('font_size', 48),
'position': segment.get('position', ('center', video_height - video_height // 3))
})
return subsegments
def create_video(audio_file: str, image_path: str, transcript_path: str) -> str:
log("VIDEO_GEN", "Creating video from audio and image...")
# Load audio and image
audio = AudioFileClip(audio_file)
image = ImageClip(image_path).set_duration(audio.duration)
# Set a fixed fps for the video
fps = 24
# Resize the image
image_resized = resize(image, height=1080).set_position("center")
# Generate captions
log("CAPTION_GEN", "Generating captions from transcript...")
with open(transcript_path, "r") as f:
transcript = json.load(f)
# Calculate dimensions for the lower third
video_width, video_height = image_resized.size
# Generate text clips in parallel with split segments
with ThreadPoolExecutor() as executor:
split_segments = []
for segment in transcript:
split_segments.extend(split_segment(segment, video_height=video_height))
# Ensure split_segments are sorted by start_time
split_segments = sorted(split_segments, key=lambda s: s['start_time'])
text_clips = list(executor.map(
lambda segment: create_text_clip(
segment,
video_width,
video_height
),
split_segments
))
# Combine all text clips into a single captions layer
captions = CompositeVideoClip(text_clips, size=image_resized.size).set_duration(audio.duration)
# Combine image and captions
video = CompositeVideoClip([image_resized, captions]).set_audio(audio)
# Export video with optimized settings
video_path = "combined_video.mp4"
video.write_videofile(
video_path,
fps=fps,
codec="libx264",
audio_codec="aac",
threads=4,
preset="ultrafast",
ffmpeg_params=["-crf", "28"]
)
log("VIDEO_GEN", f"Video created: {video_path}")
return video_path
def main():
log("PROCESS_START", "Starting the video generation process...")
# Check if required files exist
if not os.path.exists("combined_dialogue.mp3") or not os.path.exists("dialogue_transcript.json"):
log("ERROR", "Required files (combined_dialogue.mp3 or dialogue_transcript.json) not found. Run main.py first.")
return
temp_folder = f"video_{datetime.now().strftime('%Y-%m-%d_%H-%M')}_{uuid.uuid4()}"
os.makedirs(temp_folder, exist_ok=True)
log("TEMP_FOLDER", f"Created temporary folder: {temp_folder}")
# Generate an image prompt based on the transcript
with open("dialogue_transcript.json", "r") as f:
transcript = json.load(f)
# Use GPT-4o-mini to generate an image prompt
prompt_response = openai_client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "You are an AI assistant that generates image prompts based on text."},
{"role": "user", "content": f"Generate a detailed image prompt for AI News Pod cover art based on this transcript. Include an Indian male host and a blonde female host, and the words 'AI News Pod'. Transcript: {json.dumps(transcript)}"}
]
)
prompt = prompt_response.choices[0].message.content
log("IMAGE_PROMPT", f"Generated image prompt: {prompt}")
# Generate the image using the prompt
image_path = generate_default_image(temp_folder, prompt=prompt)
video_path = create_video("combined_dialogue.mp3", image_path, "dialogue_transcript.json")
log("OUTPUT", f"Final video saved as: {video_path}")
log("PROCESS_END", "Video generation process completed successfully!")
if __name__ == "__main__":
main()