ClipsGenerator/main.py at main · harshjdhv/ClipsGenerator · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
import os  # For file and directory operations, such as cleanup and path manipulation
import time  # For measuring execution time of processes
from Components.YoutubeDownloader import download_youtube_video  # Downloads YouTube videos given a URL
from Components.Edit import extractAudio, crop_video  # Extracts audio from video and crops video segments
from Components.Transcription import transcribeAudio  # Transcribes audio to text
from Components.LanguageTasks import GetHighlight, GetMultipleHighlights, estimate_clips_from_duration  # Analyzes transcribed text to identify highlight segments
from Components.FaceCrop import crop_to_vertical, combine_videos  # Converts video to vertical format and combines segments
from Components.ModelManager import model_manager  # Initialize models at startup for NLP and other tasks

def cleanup_intermediate_files():
    """Clean up ALL intermediate files created during processing, keeping only final outputs"""
    import glob

    # Individual intermediate files
    intermediate_files = [
        "Out.mp4",
        "croped.mp4",
        "DecOut.mp4",
        "audio.wav",
        "temp_audio.wav",
        "temp-audio.m4a",
        "temp_video_with_subtitles.mp4",
        "subtitles.srt",
        "Final.mp4",  # Remove Final.mp4 from root since we only want it in output_videos
    ]

    # Pattern-based files (multiple clips, downloads, etc.)
    pattern_files = [
        "Out_*.mp4",        # Multi-clip outputs
        "croped_*.mp4",     # Multi-clip cropped videos
        "Final_*.mp4",      # Multi-clip finals (remove from root)
        "video_*",          # YouTube download video streams
        "audio_*",          # YouTube download audio streams
        "*.webm",           # YouTube webm downloads
    ]

    cleanup_count = 0

    # Clean individual files
    for file in intermediate_files:
        if os.path.exists(file):
            try:
                os.remove(file)
                print(f"🧹 Cleaned up: {file}")
                cleanup_count += 1
            except Exception as e:
                print(f"⚠️  Warning: Could not remove {file}: {e}")

    # Clean pattern-based files
    for pattern in pattern_files:
        matching_files = glob.glob(pattern)
        for file in matching_files:
            try:
                os.remove(file)
                print(f"🧹 Cleaned up: {file}")
                cleanup_count += 1
            except Exception as e:
                print(f"⚠️  Warning: Could not remove {file}: {e}")

    # Clean cache files (optional - keeping for performance)
    # Uncomment the following lines if you want to clear cache too:
    # cache_files = glob.glob("cache/*.pkl")
    # for file in cache_files:
    #     try:
    #         os.remove(file)
    #         print(f"🧹 Cleaned cache: {file}")
    #         cleanup_count += 1
    #     except Exception as e:
    #         print(f"⚠️  Warning: Could not remove cache {file}: {e}")

    if cleanup_count > 0:
        print(f"✅ Cleanup complete: Removed {cleanup_count} intermediate files")
    else:
        print("✅ No intermediate files to clean")

def print_progress(step, message, total_steps=6):
    """Print clean progress indicators"""
    print(f"[{step}/{total_steps}] {message}")

def main():
    print("🚀 AI YouTube Shorts Generator")
    print("=" * 40)

    # Ask for optimization preferences
    print("\nOptimization settings:")
    print("1. Quality (slower, better quality)")
    print("2. Balanced (default)")
    print("3. Speed (faster, good quality)")
    opt_choice = input("Choose optimization level (1-3) or press Enter for quality: ").strip()

    # Set frame sampling based on optimization choice
    frame_sampling_map = {'1': 1, '2': 1, '3': 2}  # Speed mode processes every 2nd frame
    frame_sampling = frame_sampling_map.get(opt_choice, 1)  # Default to quality (1)

    # Ask about GTA clip overlay
    print("\nGTA Clip Integration:")
    print("Add GTA gameplay footage to bottom half of video?")
    gta_choice = input("Enter 'y' for yes or 'n' for no (default=y): ").strip().lower()
    use_gta_clip = gta_choice != 'n'  # Default to yes unless explicitly 'n'

    url = input("\nEnter YouTube video URL: ")

    # Ask if user wants multiple clips
    print("\nMultiple clips generation:")
    multi_choice = input("Generate multiple short videos? (y/n, default=n): ").strip().lower()
    generate_multiple = multi_choice == 'y'

    # Ask user for subtitle style preference
    print("\nChoose subtitle style:")
    print("1. Modern (white text with black outline)")
    print("2. TikTok (yellow text, uppercase)")
    print("3. Instagram (white text with gray outline)")
    style_choice = input("Enter choice (1-3) or press Enter for modern: ").strip()

    style_map = {
        '1': 'modern',
        '2': 'tiktok',
        '3': 'instagram'
    }
    subtitle_style = style_map.get(style_choice, 'modern')  # Default to modern

    print(f"\n✓ Config: {subtitle_style} style, sampling={frame_sampling}, GTA={'on' if use_gta_clip else 'off'}")
    print("=" * 40)
    total_start_time = time.time()

    print_progress(1, "Downloading video...")
    Vid = download_youtube_video(url)
    if Vid:
        Vid = Vid.replace(".webm", ".mp4")

        print_progress(2, "Extracting audio...")
        Audio = extractAudio(Vid)
        if Audio:
            print_progress(3, "Transcribing audio...")
            transcriptions = transcribeAudio(Audio)
            if len(transcriptions) > 0:
                TransText = ""

                for text, start, end in transcriptions:
                    TransText += (f"{start} - {end}: {text}")

                if generate_multiple:
                    # Get video duration for estimation
                    import moviepy.editor as mp
                    with mp.VideoFileClip(Vid) as clip:
                        duration_minutes = clip.duration / 60

                    estimated_clips = estimate_clips_from_duration(duration_minutes)
                    print(f"\n📊 Video duration: {duration_minutes:.1f} minutes")
                    print(f"📊 Estimated clips possible: {estimated_clips}")

                    num_clips = input(f"\nHow many clips to generate? (1-{min(estimated_clips, 8)}, default={min(estimated_clips, 3)}): ").strip()
                    try:
                        num_clips = int(num_clips) if num_clips else min(estimated_clips, 3)
                        num_clips = max(1, min(num_clips, 8))  # Clamp between 1-8
                    except ValueError:
                        num_clips = min(estimated_clips, 3)

                    print_progress(4, f"Finding {num_clips} highlight segments...")
                    clips = GetMultipleHighlights(TransText, num_clips)

                    if clips and len(clips) > 0:
                        print(f"\n✓ Found {len(clips)} highlights:")
                        for i, (start, stop, content) in enumerate(clips, 1):
                            print(f"   {i}. {start:.1f}s - {stop:.1f}s: {content[:50]}...")

                        total_steps = 6 + (len(clips) - 1) * 2  # Base 6 + 2 extra steps per additional clip
                        successful_clips = []

                        for clip_idx, (start, stop, content) in enumerate(clips):
                            try:
                                print(f"\n--- Processing Clip {clip_idx + 1}/{len(clips)} ---")
                                Output = f"Out_{clip_idx}.mp4"
                                croped = f"croped_{clip_idx}.mp4"

                                crop_video(Vid, Output, start, stop)
                                print_progress(5 + clip_idx * 2, f"Processing video {clip_idx + 1}...", total_steps)
                                crop_to_vertical(Output, croped, frame_sampling_rate=frame_sampling)

                                # Filter transcriptions for this clip
                                cropped_transcriptions = []
                                for text, trans_start, trans_end in transcriptions:
                                    if trans_start >= start and trans_end <= stop:
                                        adjusted_start = trans_start - start
                                        adjusted_end = trans_end - start
                                        cropped_transcriptions.append((text, adjusted_start, adjusted_end))
                                    elif trans_start < stop and trans_end > start:
                                        adjusted_start = max(0, trans_start - start)
                                        adjusted_end = min(stop - start, trans_end - start)
                                        if adjusted_end > adjusted_start:
                                            cropped_transcriptions.append((text, adjusted_start, adjusted_end))

                                print_progress(6 + clip_idx * 2, f"Finalizing clip {clip_idx + 1}...", total_steps)
                                final_output_path = combine_videos(Output, croped, f"Final_{clip_idx}.mp4", cropped_transcriptions, subtitle_style, use_gta_clip)

                                if final_output_path:
                                    successful_clips.append((clip_idx + 1, final_output_path))
                                    print(f"   ✓ Clip {clip_idx + 1} completed: {final_output_path}")

                                # Clean up intermediate files for this clip
                                for temp_file in [Output, croped]:
                                    if os.path.exists(temp_file):
                                        os.remove(temp_file)

                            except Exception as e:
                                print(f"   ❌ Error processing clip {clip_idx + 1}: {e}")
                                continue

                        cleanup_intermediate_files()
                        total_end_time = time.time()
                        processing_time = total_end_time - total_start_time

                        if successful_clips:
                            print(f"\n🎉 Complete! Generated {len(successful_clips)} clips:")
                            for clip_num, path in successful_clips:
                                print(f"   📹 Clip {clip_num}: {path}")
                            print(f"⏱️  Total processing time: {processing_time:.1f}s")
                        else:
                            print(f"\n❌ All clips failed! ({processing_time:.1f}s)")
                    else:
                        print("❌ Could not find highlight segments")
                        cleanup_intermediate_files()

                else:
                    # Single clip mode (original logic)
                    print_progress(4, "Finding highlight segment...")
                    start, stop = GetHighlight(TransText)
                    if start != 0 and stop != 0:
                        print(f"   ✓ Highlight: {start:.1f}s - {stop:.1f}s")

                        Output = "Out.mp4"

                        crop_video(Vid, Output, start, stop)
                        croped = "croped.mp4"

                        print_progress(5, "Processing video...")
                        crop_to_vertical("Out.mp4", croped, frame_sampling_rate=frame_sampling)

                        # Filter transcriptions to match the cropped video timeframe
                        cropped_transcriptions = []
                        for text, trans_start, trans_end in transcriptions:
                            # Adjust timestamps relative to the cropped video
                            if trans_start >= start and trans_end <= stop:
                                adjusted_start = trans_start - start
                                adjusted_end = trans_end - start
                                cropped_transcriptions.append((text, adjusted_start, adjusted_end))
                            elif trans_start < stop and trans_end > start:
                                # Partial overlap - adjust accordingly
                                adjusted_start = max(0, trans_start - start)
                                adjusted_end = min(stop - start, trans_end - start)
                                if adjusted_end > adjusted_start:
                                    cropped_transcriptions.append((text, adjusted_start, adjusted_end))

                        print_progress(6, "Finalizing output...")
                        final_output_path = combine_videos("Out.mp4", croped, "Final.mp4", cropped_transcriptions, subtitle_style, use_gta_clip)

                        cleanup_intermediate_files()

                        total_end_time = time.time()
                        processing_time = total_end_time - total_start_time

                        if final_output_path:
                            print(f"\n🎉 Complete! Video saved: {final_output_path}")
                            print(f"⏱️  Processing time: {processing_time:.1f}s")
                        else:
                            print(f"\n❌ Processing failed! ({processing_time:.1f}s)")

                    else:
                        print("❌ Could not find highlight segment")
                        cleanup_intermediate_files()
            else:
                print("❌ No transcriptions found")
                cleanup_intermediate_files()
        else:
            print("❌ Audio extraction failed")
            cleanup_intermediate_files()
    else:
        print("❌ Video download failed")

if __name__ == "__main__":
    main()