Skip to content

Commit add9d7d

Browse files
authored
Merge pull request #1 from emre570/emre570-yt-summarizer
YouTube Summarizer
2 parents 961b78d + af677b2 commit add9d7d

File tree

3 files changed

+71
-11
lines changed

3 files changed

+71
-11
lines changed

summarizer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def setup_summarization_chain():
4545
input_variables=["text"],
4646
)
4747

48-
llm = ChatOllama(model="llama3", base_url="http://0.0.0.0:11434")
48+
llm = ChatOllama(model="llama3", base_url="http://127.0.0.1:11434")
4949
llm_chain = LLMChain(llm=llm, prompt=prompt_template)
5050
return llm_chain
5151

webui.py

+12-10
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,34 @@
11
import gradio as gr
22

33
from summarizer import load_document, setup_summarization_chain
4+
from yt_summarizer import summarize_video, check_link
45
from translator import setup_translator_chain
56

6-
77
def summarize(url):
8-
docs = load_document(url)
9-
llm_chain = setup_summarization_chain()
10-
result = llm_chain.run(docs)
8+
if check_link(url):
9+
result = summarize_video(url)
10+
else:
11+
docs = load_document(url)
12+
llm_chain = setup_summarization_chain()
13+
result = llm_chain.run(docs)
1114

1215
return [result, gr.Button("🇹🇷 Translate ", visible=True)]
1316

14-
1517
def translate(text):
1618
llm_chain = setup_translator_chain()
1719
result = llm_chain.run(text)
1820
return result
1921

20-
2122
with gr.Blocks() as demo:
2223
gr.Markdown(
23-
"""# Cobanov Web Summarizer
24-
Easily summarize any web page with a single click."""
24+
"""# Cobanov Web and Video Summarizer
25+
Easily summarize any web page or YouTube video with a single click."""
2526
)
2627

2728
with gr.Row():
2829
with gr.Column():
2930
url = gr.Text(label="URL", placeholder="Enter URL here")
31+
3032
btn_generate = gr.Button("Generate")
3133

3234
summary = gr.Markdown(label="Summary")
@@ -36,6 +38,7 @@ def translate(text):
3638
[
3739
"https://cobanov.dev/haftalik-bulten/hafta-13",
3840
"https://bawolf.substack.com/p/embeddings-are-a-good-starting-point",
41+
"https://www.youtube.com/watch?v=4pOpQwiUVXc",
3942
],
4043
inputs=[url],
4144
)
@@ -51,5 +54,4 @@ def translate(text):
5154
btn_generate.click(summarize, inputs=[url], outputs=[summary, btn_translate])
5255
btn_translate.click(translate, inputs=[summary], outputs=[summary])
5356

54-
55-
demo.launch()
57+
demo.launch()

yt_summarizer.py

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
from langchain_community.document_loaders import YoutubeLoader
2+
from langchain.text_splitter import TokenTextSplitter
3+
from langchain_community.chat_models import ChatOllama
4+
from langchain.chains.summarize import load_summarize_chain
5+
from langchain_core.prompts import PromptTemplate
6+
import re
7+
8+
def check_link(link):
9+
yt_regex = r"(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)[\w-]+"
10+
return re.match(yt_regex, link) is not None
11+
12+
def get_transcript(video_link):
13+
# Get video transcript
14+
if check_link(video_link):
15+
loader = YoutubeLoader.from_youtube_url(video_link, language=["en", "en-US"])
16+
transcript = loader.load()
17+
return transcript
18+
return "Invalid YouTube URL."
19+
20+
def split_chunks(transcript):
21+
# Split the transcript into chunks
22+
# Llama 3 model takes up to 8192 input tokens, so I set chunk size to 7500 for leaving some space to model.
23+
splitter = TokenTextSplitter(chunk_size = 7500, chunk_overlap = 100)
24+
chunks = splitter.split_documents(transcript)
25+
return chunks
26+
27+
def yt_summarization_chain():
28+
prompt_template = PromptTemplate(
29+
template="""As a professional summarizer specialized in video content, create a detailed and comprehensive summary of the YouTube video transcript provided. While crafting your summary, adhere to these guidelines:
30+
1. Capture the essence of the video, focusing on main ideas and key details. Ensure the summary is in-depth and insightful, reflecting any narrative or instructional elements present in the video.
31+
32+
2. Exclude any redundant expressions and non-critical details to enhance the clarity and conciseness of the summary.
33+
34+
3. Base the summary strictly on the transcript provided, avoiding assumptions or additions from external sources.
35+
36+
4. Present the summary in a well-structured paragraph form, making it easy to read and understand.
37+
38+
5. Conclude with "[End of Notes, Message #X]", where "X" is the sequence number of the summarizing request, to indicate the completion of the task.
39+
40+
By adhering to this optimized prompt, you are expected to produce a clear, detailed, and audience-friendly summary that effectively conveys the core content and themes of the YouTube video.
41+
42+
"{text}"
43+
44+
DETAILED SUMMARY:""",
45+
input_variables=["text"],
46+
)
47+
llm = ChatOllama(model="llama3")
48+
summarize_chain = load_summarize_chain(llm=llm, prompt=prompt_template, verbose=True)
49+
return summarize_chain
50+
51+
def summarize_video(video_link):
52+
transcript = get_transcript(video_link)
53+
chunks = split_chunks(transcript)
54+
55+
sum_chain = yt_summarization_chain()
56+
result = sum_chain.run(chunks)
57+
58+
return result

0 commit comments

Comments
 (0)