-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
221 lines (187 loc) · 6.89 KB
/
main.py
File metadata and controls
221 lines (187 loc) · 6.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
import os
import discord
from discord.ext import commands
from discord import app_commands
from dotenv import load_dotenv
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse
import re
import logging
load_dotenv()
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[logging.FileHandler("bot.log"), logging.StreamHandler()],
)
class LinkCleaner(commands.Bot):
def __init__(self, intents):
super().__init__(command_prefix="!", intents=intents)
# Comprehensive list of tracking parameters to remove
self.tracking_params = [
# UTM parameters
"utm_source",
"utm_medium",
"utm_campaign",
"utm_term",
"utm_content",
# Social media and ad platform tracking
"fbclid",
"gclid",
"ref",
"source",
"tk",
# Affiliate and click tracking
"aff_id",
"aff_sub",
"aff_click_id",
"click_id",
# Campaign and ad tracking
"campaign_id",
"ad_id",
"placement_id",
"creative_id",
"network_id",
# Referrer and session tracking
"utm_referrer",
"referrer",
"sref",
"referer",
"track_id",
"tag",
"subid",
"subid2",
"subid3",
"rurl",
"sid",
"dclid",
"twclid",
"igshid",
"igsh",
]
# Regex pattern to find URLs in text
self.url_pattern = re.compile(r"https?://\S+")
def is_valid_url(self, url):
"""Validate URL more thoroughly."""
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except ValueError:
return False
def clean_url(self, url):
"""Remove tracking parameters from a URL, but skip Discord media URLs."""
try:
parsed_url = urlparse(url)
# Skip cleaning for Discord media URLs
if parsed_url.netloc == "media.discordapp.net":
return url
query_params = parse_qs(parsed_url.query)
# Remove tracking parameters
cleaned_params = {
key: value
for key, value in query_params.items()
if key not in self.tracking_params
}
# Reconstruct the URL without tracking parameters
cleaned_query = urlencode(cleaned_params, doseq=True)
cleaned_url = urlunparse(
(
parsed_url.scheme,
parsed_url.netloc,
parsed_url.path,
parsed_url.params,
cleaned_query,
parsed_url.fragment,
)
)
return cleaned_url
except Exception as e:
logging.error(f"Error cleaning URL {url}: {e}")
return url
async def setup_hook(self):
"""Set up the bot, including adding context menu commands"""
# Add URL cleaning context menu command
@self.tree.context_menu(name="Clean URL")
@app_commands.allowed_installs(guilds=True, users=True)
@app_commands.allowed_contexts(guilds=True, dms=True, private_channels=True)
async def clean_url_context_menu(
interaction: discord.Interaction, message: discord.Message
):
# Find URLs in the message
urls = self.url_pattern.findall(message.content)
if not urls:
await interaction.response.send_message(
"No URLs found in the message.", ephemeral=True
)
return
cleaned_links = []
for url in urls:
# Additional cleaning to remove potential punctuation at end of URL
url = url.rstrip(".,!?)")
if self.is_valid_url(url):
cleaned_url = self.clean_url(url)
# Only add if the URL actually changed
if cleaned_url != url:
cleaned_links.append(cleaned_url)
if not cleaned_links:
await interaction.response.send_message(
"No tracking parameters found to clean.", ephemeral=True
)
return
# Create buttons for each cleaned link
view = discord.ui.View()
for link in cleaned_links:
button = discord.ui.Button(
label=f"Open {urlparse(link).netloc}", url=link
)
view.add_item(button)
# Send the response with cleaned links and buttons
cleaned_links_text = "Cleaned links:\n" + "\n".join(cleaned_links)
await interaction.response.send_message(
cleaned_links_text, view=view, ephemeral=True
)
# Regular message listener for automatic cleaning
@self.event
async def on_message(message):
if message.author.bot:
return
# Find all URLs in the message content
urls = self.url_pattern.findall(message.content)
cleaned_links = []
for url in urls:
# Additional cleaning to remove potential punctuation at end of URL
url = url.rstrip(".,!?)")
if self.is_valid_url(url):
cleaned_url = self.clean_url(url)
# Only add if the URL actually changed
if cleaned_url != url:
cleaned_links.append(cleaned_url)
if cleaned_links:
# Create buttons for each cleaned link
view = discord.ui.View()
for link in cleaned_links:
button = discord.ui.Button(
label=f"Open {urlparse(link).netloc}", url=link
)
view.add_item(button)
# Send the reply with cleaned links and buttons
await message.reply(
"Here are the cleaned links without tracking:",
mention_author=False,
view=view,
)
# Process commands if needed
await self.process_commands(message)
# Sync application commands
await self.tree.sync()
logging.info("Bot is ready and commands have been synced.")
def main():
intents = discord.Intents.default()
intents.message_content = True
bot = LinkCleaner(intents)
# Run the bot
token = os.getenv("DISCORD_BOT_TOKEN")
if not token:
logging.error("DISCORD_BOT_TOKEN not found in environment variables.")
return
bot.run(token)
if __name__ == "__main__":
main()