diff --git a/classes/classes.py b/classes/classes.py index 64fc9ea..1a0eb45 100644 --- a/classes/classes.py +++ b/classes/classes.py @@ -4,8 +4,6 @@ class User: def __init__(self, pyrogram_user_obj): - _TIME_FORMAT = '%Y-%m-%d %H:%M:%S' - self.id = pyrogram_user_obj.id self.is_bot = pyrogram_user_obj.is_bot self.first_name = "N.A." if pyrogram_user_obj.first_name is None else pyrogram_user_obj.first_name @@ -15,24 +13,29 @@ def __init__(self, pyrogram_user_obj): def to_string(self): return_string = "" - return_string = return_string + self.username + "§" return_string = return_string + self.first_name + " " + self.last_name + "§" return_string = return_string + self.phone_number - return return_string +def format_date(date_obj): + _TIME_FORMAT = '%Y-%m-%d %H:%M:%S' + if date_obj is None: + return "" + if isinstance(date_obj, int) or isinstance(date_obj, float): + return datetime.utcfromtimestamp(date_obj).strftime(_TIME_FORMAT) + return date_obj.strftime(_TIME_FORMAT) + + class Photo: def __init__(self, pyrogram_photo_obj): - _TIME_FORMAT = '%Y-%m-%d %H:%M:%S' - self.id = pyrogram_photo_obj.file_id self.width = pyrogram_photo_obj.width self.height = pyrogram_photo_obj.height self.size = pyrogram_photo_obj.file_size - self.date = datetime.utcfromtimestamp(pyrogram_photo_obj.date).strftime(_TIME_FORMAT) + self.date = format_date(pyrogram_photo_obj.date) self.ttl_seconds = "" if pyrogram_photo_obj.ttl_seconds is None else str(pyrogram_photo_obj.ttl_seconds) def to_string(self): @@ -47,21 +50,17 @@ def to_string(self): class Audio: def __init__(self, pyrogram_audio_obj): - _TIME_FORMAT = '%Y-%m-%d %H:%M:%S' - self.id = pyrogram_audio_obj.file_id self.duration = pyrogram_audio_obj.duration self.file_name = "" if pyrogram_audio_obj.file_name is None else pyrogram_audio_obj.file_name self.mime_type = "" if pyrogram_audio_obj.mime_type is None else pyrogram_audio_obj.mime_type self.size = "" if pyrogram_audio_obj.file_size is None else str(pyrogram_audio_obj.file_size) - self.date = "" if pyrogram_audio_obj.date is None else \ - str(datetime.utcfromtimestamp(pyrogram_audio_obj.date).strftime(_TIME_FORMAT)) + self.date = format_date(pyrogram_audio_obj.date) self.performer = "" if pyrogram_audio_obj.performer is None else pyrogram_audio_obj.performer self.title = "" if pyrogram_audio_obj.title is None else pyrogram_audio_obj.title def to_string(self): return_string = "id = {}, duration = {}".format(self.id, self.duration) - # Optional fields return_string = return_string + ", File name = {}".format(self.file_name) \ if self.file_name != "" else return_string return_string = return_string + ", Mime type = {}".format(self.mime_type) \ @@ -80,18 +79,14 @@ def to_string(self): class Document: def __init__(self, pyrogram_doc_obj): - _TIME_FORMAT = '%Y-%m-%d %H:%M:%S' - self.id = pyrogram_doc_obj.file_id self.file_name = "" if pyrogram_doc_obj.file_name is None else pyrogram_doc_obj.file_name self.mime_type = "" if pyrogram_doc_obj.mime_type is None else pyrogram_doc_obj.mime_type self.size = "" if pyrogram_doc_obj.file_size is None else str(pyrogram_doc_obj.file_size) - self.date = "" if pyrogram_doc_obj.date is None else \ - str(datetime.utcfromtimestamp(pyrogram_doc_obj.date).strftime(_TIME_FORMAT)) + self.date = format_date(pyrogram_doc_obj.date) def to_string(self): return_string = "id = {}".format(self.id) - # Optional fields return_string = return_string + ", File name = {}".format(self.file_name) \ if self.file_name != "" else return_string return_string = return_string + ", Mime type = {}".format(self.mime_type) \ @@ -106,8 +101,6 @@ def to_string(self): class Sticker: def __init__(self, pyrogram_sticker_obj): - _TIME_FORMAT = '%Y-%m-%d %H:%M:%S' - self.id = pyrogram_sticker_obj.file_id self.width = pyrogram_sticker_obj.width self.height = pyrogram_sticker_obj.height @@ -115,15 +108,13 @@ def __init__(self, pyrogram_sticker_obj): self.file_name = "" if pyrogram_sticker_obj.file_name is None else pyrogram_sticker_obj.file_name self.mime_type = "" if pyrogram_sticker_obj.mime_type is None else pyrogram_sticker_obj.mime_type self.size = "" if pyrogram_sticker_obj.file_size is None else str(pyrogram_sticker_obj.file_size) - self.date = "" if pyrogram_sticker_obj.date is None else \ - str(datetime.utcfromtimestamp(pyrogram_sticker_obj.date).strftime(_TIME_FORMAT)) + self.date = format_date(pyrogram_sticker_obj.date) self.emoji = "" if pyrogram_sticker_obj.emoji is None else pyrogram_sticker_obj.emoji self.set_name = "" if pyrogram_sticker_obj.set_name is None else pyrogram_sticker_obj.set_name def to_string(self): return_string = "id = {}, width = {}, height = {}, is animated = {}".format(self.id, self.width, self.height, self.is_animated) - # Optional fields return_string = return_string + ", File name = {}".format(self.file_name) \ if self.file_name != "" else return_string return_string = return_string + ", Mime type = {}".format(self.mime_type) \ @@ -142,8 +133,6 @@ def to_string(self): class Animation: def __init__(self, pyrogram_animation_obj): - _TIME_FORMAT = '%Y-%m-%d %H:%M:%S' - self.id = pyrogram_animation_obj.file_id self.width = pyrogram_animation_obj.width self.height = pyrogram_animation_obj.height @@ -151,13 +140,11 @@ def __init__(self, pyrogram_animation_obj): self.file_name = "" if pyrogram_animation_obj.file_name is None else pyrogram_animation_obj.file_name self.mime_type = "" if pyrogram_animation_obj.mime_type is None else pyrogram_animation_obj.mime_type self.size = "" if pyrogram_animation_obj.file_size is None else str(pyrogram_animation_obj.file_size) - self.date = "" if pyrogram_animation_obj.date is None else \ - str(datetime.utcfromtimestamp(pyrogram_animation_obj.date).strftime(_TIME_FORMAT)) + self.date = format_date(pyrogram_animation_obj.date) def to_string(self): return_string = "id = {}, width = {}, height = {}, duration = {}".format(self.id, self.width, self.height, self.duration) - # Optional fields return_string = return_string + ", File name = {}".format(self.file_name) \ if self.file_name != "" else return_string return_string = return_string + ", Mime type = {}".format(self.mime_type) \ @@ -172,8 +159,6 @@ def to_string(self): class Game: def __init__(self, pyrogram_game_obj): - _TIME_FORMAT = '%Y-%m-%d %H:%M:%S' - self.id = pyrogram_game_obj.file_id self.title = pyrogram_game_obj.title self.short_name = pyrogram_game_obj.short_name @@ -185,7 +170,6 @@ def to_string(self): return_string = "id = {}, title = {}, short name = {}, " \ "description = {}, photo meta-data = {}".format(self.id, self.title, self.short_name, self.description, Photo(self.photo).to_string()) - # Optional fields return_string = return_string + ", Animation = {}".format(Animation(self.animation).to_string()) \ if self.animation != "" else return_string @@ -194,8 +178,6 @@ def to_string(self): class Video: def __init__(self, pyrogram_video_obj): - _TIME_FORMAT = '%Y-%m-%d %H:%M:%S' - self.id = pyrogram_video_obj.file_id self.width = pyrogram_video_obj.width self.height = pyrogram_video_obj.height @@ -205,14 +187,12 @@ def __init__(self, pyrogram_video_obj): self.supports_streaming = "" if pyrogram_video_obj.supports_streaming is None \ else pyrogram_video_obj.supports_streaming self.size = "" if pyrogram_video_obj.file_size is None else str(pyrogram_video_obj.file_size) - self.date = "" if pyrogram_video_obj.date is None else \ - str(datetime.utcfromtimestamp(pyrogram_video_obj.date).strftime(_TIME_FORMAT)) + self.date = format_date(pyrogram_video_obj.date) self.ttl_seconds = "" if pyrogram_video_obj.ttl_seconds is None else pyrogram_video_obj.ttl_seconds def to_string(self): return_string = "id = {}, width = {}, height = {}, duration = {}".format(self.id, self.width, self.height, self.duration) - # Optional fields return_string = return_string + ", File name = {}".format(self.file_name) \ if self.file_name != "" else return_string return_string = return_string + ", Mime type = {}".format(self.mime_type) \ @@ -231,18 +211,14 @@ def to_string(self): class Voice: def __init__(self, pyrogram_voice_obj): - _TIME_FORMAT = '%Y-%m-%d %H:%M:%S' - self.id = pyrogram_voice_obj.file_id self.duration = pyrogram_voice_obj.duration self.mime_type = "" if pyrogram_voice_obj.mime_type is None else pyrogram_voice_obj.mime_type self.size = "" if pyrogram_voice_obj.file_size is None else str(pyrogram_voice_obj.file_size) - self.date = "" if pyrogram_voice_obj.date is None else \ - str(datetime.utcfromtimestamp(pyrogram_voice_obj.date).strftime(_TIME_FORMAT)) + self.date = format_date(pyrogram_voice_obj.date) def to_string(self): return_string = "id = {}, duration = {}".format(self.id, self.duration) - # Optional fields return_string = return_string + ", Mime type = {}".format(self.mime_type) \ if self.mime_type != "" else return_string return_string = return_string + ", Size = {}".format(self.size) \ @@ -255,19 +231,15 @@ def to_string(self): class Videonote: def __init__(self, pyrogram_videonote_obj): - _TIME_FORMAT = '%Y-%m-%d %H:%M:%S' - self.id = pyrogram_videonote_obj.file_id self.length = pyrogram_videonote_obj.length self.duration = pyrogram_videonote_obj.duration self.mime_type = "" if pyrogram_videonote_obj.mime_type is None else pyrogram_videonote_obj.mime_type self.size = "" if pyrogram_videonote_obj.file_size is None else str(pyrogram_videonote_obj.file_size) - self.date = "" if pyrogram_videonote_obj.date is None else \ - str(datetime.utcfromtimestamp(pyrogram_videonote_obj.date).strftime(_TIME_FORMAT)) + self.date = format_date(pyrogram_videonote_obj.date) def to_string(self): return_string = "id = {}, length = {}, duration = {}".format(self.id, self.length, self.duration) - # Optional fields return_string = return_string + ", Mime type = {}".format(self.mime_type) \ if self.mime_type != "" else return_string return_string = return_string + ", Size = {}".format(self.size) \ @@ -280,8 +252,6 @@ def to_string(self): class Contact: def __init__(self, pyrogram_contact_obj): - _TIME_FORMAT = '%Y-%m-%d %H:%M:%S' - self.phone_number = pyrogram_contact_obj.phone_number self.first_name = pyrogram_contact_obj.first_name self.last_name = "" if pyrogram_contact_obj.last_name is None else pyrogram_contact_obj.last_name @@ -290,7 +260,6 @@ def __init__(self, pyrogram_contact_obj): def to_string(self): return_string = "Phone Number = {}, First name = {}".format(self.phone_number, self.first_name) - # Optional fields return_string = return_string + ", Last name = {}".format(self.last_name) \ if self.last_name != "" else return_string return_string = return_string + ", User Id = {}".format(self.user_id) \ @@ -314,8 +283,6 @@ def to_string(self): class Venue: def __init__(self, pyrogram_venue_obj): - _TIME_FORMAT = '%Y-%m-%d %H:%M:%S' - self.longitude = pyrogram_venue_obj.longitude self.title = pyrogram_venue_obj.title self.address = pyrogram_venue_obj.address @@ -323,9 +290,7 @@ def __init__(self, pyrogram_venue_obj): self.foursquare_type = "" if pyrogram_venue_obj.foursquare_type is None else pyrogram_venue_obj.foursquare_type def to_string(self): - return_string = "Longitude = {}, Title = {}, Address = {}".format(self.longitude, self.title, self.address, - self.is_animated) - # Optional fields + return_string = "Longitude = {}, Title = {}, Address = {}".format(self.longitude, self.title, self.address) return_string = return_string + ", Foursquare id = {}".format(self.foursquare_id) \ if self.foursquare_id != "" else return_string return_string = return_string + ", Foursquare type = {}".format(self.foursquare_type) \ @@ -336,8 +301,6 @@ def to_string(self): class WebPage: def __init__(self, pyrogram_web_page_obj): - _TIME_FORMAT = '%Y-%m-%d %H:%M:%S' - self.id = pyrogram_web_page_obj.file_id self.url = pyrogram_web_page_obj.url self.display_url = pyrogram_web_page_obj.display_url @@ -359,7 +322,6 @@ def __init__(self, pyrogram_web_page_obj): def to_string(self): return_string = "id = {}, URL = {}, Displayed URL = {}".format(self.id, self.url, self.display_url) - # Optional fields return_string = return_string + ", Type = {}".format(self.type) \ if self.type != "" else return_string return_string = return_string + ", Site name = {}".format(self.site_name) \ @@ -396,8 +358,6 @@ def to_string(self): class Poll: def __init__(self, pyrogram_poll_obj): - _TIME_FORMAT = '%Y-%m-%d %H:%M:%S' - self.id = pyrogram_poll_obj.id self.question = pyrogram_poll_obj.question self.options = pyrogram_poll_obj.options @@ -413,7 +373,6 @@ def to_string(self): return_string = "Id = {}, Question = {}, Options = {}, " \ "Num of voters = {}, Is closed = {}".format(self.id, self.question, self.options, self.total_voter_count, self.is_closed) - # Optional fields return_string = return_string + ", Type = {}".format(self.type) \ if self.type != "" else return_string return_string = return_string + ", Is anonymous = {}".format(self.is_anonymous) \ @@ -428,8 +387,6 @@ def to_string(self): class Dice: def __init__(self, pyrogram_dice_obj): - _TIME_FORMAT = '%Y-%m-%d %H:%M:%S' - self.emoji = pyrogram_dice_obj.emoji self.value = pyrogram_dice_obj.value @@ -449,4 +406,3 @@ class BColor: ENDC = '\033[0m' BOLD = '\033[1m' UNDERLINE = '\033[4m' - diff --git a/telegramexporter.py b/telegramexporter.py index c00e425..7b45be1 100644 --- a/telegramexporter.py +++ b/telegramexporter.py @@ -9,6 +9,8 @@ import shutil import zipfile import hashlib +import configparser +import asyncio from colorama import init init() @@ -17,10 +19,8 @@ _ALL_CHATS_HEADER_STRING = "USERNAME§NAME§PHONE_NUMBER§TIMESTAMP§MESSAGE§DETAILS (OPTIONAL)" _OS_SEP = os.sep -# CURRENT EXTRACTION FOLDER _EXTRACTION_FOLDER = "" -# PATH USED FOR THE EXTRACTION OF CHATS, MEDIA AND MEMBERS _CHATS = "chats" _DOWNLOAD_MEDIA_PATH = "media" _MEMBERS_FILE_SUFFIX = "members" @@ -29,16 +29,16 @@ _MEDIA_PATH = "" _MEMBERS_PATH = "" -# EXTRACTION ZIP AND HASH FILE _EXTRACTION_ZIP = "" _FILE_HASH = "" +# Wrapper to make input() non-blocking for Pyrogram keepalives +async def async_input(prompt): + return await asyncio.to_thread(input, prompt) def update_folders(): - # DATETIME FOR EXTRACTION _extraction_date = datetime.now().strftime("%d-%m-%Y %H-%M-%S") - # CURRENT EXTRACTION FOLDER global _EXTRACTION_FOLDER _EXTRACTION_FOLDER = "extraction" + _OS_SEP + "Extraction_" + _extraction_date @@ -51,7 +51,6 @@ def update_folders(): global _MEMBERS_PATH _MEMBERS_PATH = _EXTRACTION_FOLDER + _OS_SEP + _MEMBERS_FILE_SUFFIX - # EXTRACTION ZIP AND HASH FILE global _EXTRACTION_ZIP _EXTRACTION_ZIP = _EXTRACTION_FOLDER + _OS_SEP + "extraction.zip" @@ -59,67 +58,56 @@ def update_folders(): _FILE_HASH = _EXTRACTION_FOLDER + _OS_SEP + "extraction_archive_hash.txt" -# Get the all messages in the chat with a given user -def get_chat_logs_by_identifier(client_instance, chat_identifier, directory_name): - """ - Iterates over all messages retrieved by the chat and generates the related logs; - if medias are found, it downloads them; - generates the list with data associated to chat partecipants - Args: - client_instance: Pyrogram Client, the main means for interacting with Telegram. - chat_identifier: the ID of the chat (username or chat_id) - directory_name: name of the directory into which create the logs and download medias - - Returns: - formatted_log: list with chat logs (each element of the list is a chat log) - partecipants_ids: list with the ids associated to the partecipants of the chat - """ +async def get_chat_logs_by_identifier(client_instance, chat_identifier, directory_name): partecipants_ids = list() try: - for members in client_instance.iter_chat_members(chat_identifier): + # Changed iter_chat_members to get_chat_members (async) + async for members in client_instance.get_chat_members(chat_identifier): partecipants_ids.append(members.user.id) except Exception as e: - if e.__str__().__contains__("ChatParticipantsForbidden"): + if "ChatParticipantsForbidden" in str(e): print(f"[{classes.BColor.FAIL}write_all_members_channel_logs_file{classes.BColor.ENDC}] " f"Members can not be retrieved because it's a channel or an old private group. \nIn the latter case, " f"Telegram denies the possibility to get the full list of members;\n it's possible to show only users" f"who wrote at least one message into the chat." + "\n\n") - # Retrieves the folder into which create the chat's media folder - json_config = open("configuration.json", "r") - load_json = json.load(json_config) - export_media = load_json["export_media"] + with open("configuration.json", "r") as json_config: + load_json = json.load(json_config) + export_media = load_json["export_media"] - # Identifies the type of chat, to obtain the channel name in case of channel chats chat_obj = None while chat_obj is None: try: - chat_obj = client_instance.get_chat(chat_identifier) + # Added await + chat_obj = await client_instance.get_chat(chat_identifier) except FloodWait: print(f"{classes.BColor.FAIL}[get_chat_logs_by_identifier] FloodWait exception may be fired by Telegram. " f"Waiting 22s{classes.BColor.ENDC}") - time.sleep(22) # this value is specifically provided by Telegram, - # relating to the particular API calling which caused the exception + time.sleep(22) + chat_title = "" - if chat_obj.type == "channel": + if chat_obj.type.value == "channel": # v2 uses enums, checking value string or accessing property if chat_obj.username is not None: chat_title = chat_obj.username else: chat_title = chat_obj.title + elif str(chat_obj.type) == "ChatType.CHANNEL": # Safety check for enum string + if chat_obj.username is not None: + chat_title = chat_obj.username + else: + chat_title = chat_obj.title while True: try: formatted_log = list() - - # Create a list with ALL messages exchanged with userIdentifier chat = list() - # DEBUG: for message in client_instance.get_history(chat_identifier, limit=3): instead of for message in client.iter_history(chat_identifier): - for message in client_instance.iter_history(chat_identifier): + + # Changed iter_history to get_chat_history (async) + async for message in client_instance.get_chat_history(chat_identifier): chat.append(message) - # Iterate over the previously created list + for msg in chat: - # export media if JSON is 1 if export_media == 1: if msg.media: try: @@ -130,20 +118,27 @@ def get_chat_logs_by_identifier(client_instance, chat_identifier, directory_name create_path = create_directory + _OS_SEP + directory_name + _OS_SEP print( f"[{classes.BColor.OKBLUE}get_chat_logs_by_identifier{classes.BColor.ENDC}] Downloading attached media...") - client_instance.download_media(msg, file_name=create_path) + # Added await for download_media + await client_instance.download_media(msg, file_name=create_path) except ValueError: print( f"[{classes.BColor.FAIL}get_chat_logs_by_identifier{classes.BColor.ENDC}] This media is not downloadable.") except Exception as e: print('Failed to download. Reason: {}'.format(e)) - # Creates the log first column + if msg.from_user is not None: _sender_username = classes.User(msg.from_user).to_string() if msg.from_user.id not in partecipants_ids: partecipants_ids.append(msg.from_user.id) else: _sender_username = chat_title - _formatted_message_date = datetime.utcfromtimestamp(msg.date).strftime(_TIME_FORMAT) + + # Handling date (v2 date is already datetime object) + date_val = msg.date + if isinstance(date_val, int): + _formatted_message_date = datetime.utcfromtimestamp(date_val).strftime(_TIME_FORMAT) + else: + _formatted_message_date = date_val.strftime(_TIME_FORMAT) if msg.text is not None: log_line = _FORMAT_LOG_STRING.format(_sender_username, _formatted_message_date, @@ -242,22 +237,10 @@ def get_chat_logs_by_identifier(client_instance, chat_identifier, directory_name except FloodWait: print(f"{classes.BColor.FAIL}[get_chat_logs_by_identifier] FloodWait exception may be fired by Telegram. " f"Waiting 29s{classes.BColor.ENDC}") - time.sleep(29) # this value is specifically provided by Telegram, - # relating to the particular API calling which caused the exception - - -def get_contact(client_instance, targets=None): - """ - Searches from matching chats with the ids entered by the user - The function distinguishes between “private”, “bot”, “group”, “supergroup” or “channel”. - Args: - client_instance: Pyrogram Client, the main means for interacting with Telegram. - targets: can be: list of full name, username or phone number for each user - Returns: - saved_contact: list of contacts saved into user's phone book - non_contact_chat_dict: list of non-contact: “bot”, “group”, “supergroup” or “channel” - - """ + time.sleep(29) + + +async def get_contact(client_instance, targets=None): if targets is None: targets = [] saved_contact = list() @@ -265,59 +248,53 @@ def get_contact(client_instance, targets=None): non_contact_type_dict = dict() print(f"\n[{classes.BColor.OKBLUE}get_contact{classes.BColor.ENDC}] Retrieving all matching contacts\n") - # iterate over chats for target in targets: - for dialog in client_instance.iter_dialogs(): - # Users and bot are handled in the same way by Telegram - if dialog.chat.type == 'private' or dialog.chat.type == 'bot': - user = client_instance.get_users(dialog.chat.id) - - first_name = '' if user["first_name"] is None else str(user["first_name"]).lower() - last_name = '' if user["last_name"] is None else str(user["last_name"]).lower() - phone_number = '' if user["phone_number"] is None else str(user["phone_number"]).lower() - username = '' if user["username"] is None else str(user["username"]).lower() + # Changed iter_dialogs to get_dialogs (async) + async for dialog in client_instance.get_dialogs(): + # In v2, chat.type is an Enum. Convert to str for comparison + d_type = str(dialog.chat.type).lower() # e.g. "chattype.private" + + # Simple check for types + is_private = "private" in d_type + is_bot = "bot" in d_type + + if is_private or is_bot: + # get_users is async + users_list = await client_instance.get_users([dialog.chat.id]) + user = users_list[0] if isinstance(users_list, list) else users_list + + first_name = '' if user.first_name is None else str(user.first_name).lower() + last_name = '' if user.last_name is None else str(user.last_name).lower() + phone_number = '' if user.phone_number is None else str(user.phone_number).lower() + username = '' if user.username is None else str(user.username).lower() full_name = first_name + " " + last_name is_present = True if target in full_name or target in username or target in phone_number else False - # if user still exists and the user has specified a name to search or if he wants all users - if (not user["is_deleted"]) and ((target != "" and is_present) or (target == "")): + if (not user.is_deleted) and ((target != "" and is_present) or (target == "")): print(f"[{classes.BColor.OKBLUE}get_contact{classes.BColor.ENDC}] " f"Person chat match found{classes.BColor.ENDC}") - # add the dictionary to the resulting variable saved_contact.append(user) - - # in this case, if dialog.chat.type is not private - # else is "group", "supergroup" or "channel" else: title = dialog.chat.title - # for target in targets: - if target in title.lower(): + if title and target in title.lower(): print(f"[{classes.BColor.OKBLUE}get_contact{classes.BColor.ENDC}] " + - dialog.chat.type + + str(dialog.chat.type) + " chat match found") non_contact_chat_dict[dialog.chat.id] = title - non_contact_type_dict[dialog.chat.id] = dialog.chat.type + non_contact_type_dict[dialog.chat.id] = str(dialog.chat.type) return saved_contact, non_contact_chat_dict, non_contact_type_dict -def menu_get_contact(client_instance): - """ - Reads user input for single researches and gives a feedback about the research (chats found or not) - Args: - client_instance: Pyrogram Client, the main means for interacting with Telegram. - Returns: - the id of the chat, distinguished as user chat or non-user chat - """ - target_name = input("You can enter one of the following information: " +async def menu_get_contact(client_instance): + target_name = await async_input("You can enter one of the following information: " "\n- Phone Book name \n- Telegram username \n- Channel name \n- Group name " "\n- Phone number (in this case remember to indicate also the phone prefix): " "\n- Or press enter if you want to see a list of the chats" "\n Please enter your decision: ") - # necessary [target_name.lower()] as list for method get_contact - users, non_user_dict, non_contact_type_dict = get_contact(client_instance, [target_name.lower()]) + users, non_user_dict, non_contact_type_dict = await get_contact(client_instance, [target_name.lower()]) if not users and not bool(non_user_dict): print(f"{classes.BColor.FAIL}No contacts found!{classes.BColor.ENDC}") @@ -351,7 +328,8 @@ def menu_get_contact(client_instance): while select_key != 0: print(f"[{classes.BColor.OKBLUE}menu_get_contact{classes.BColor.ENDC}] Select number please: ") try: - key = int(input()) + key_in = await async_input("") + key = int(key_in) if key < 0 or key >= len(users) + len(non_user_dict): print(f"{classes.BColor.WARNING}[menu_get_contact] Invalid selection.{classes.BColor.ENDC}") else: @@ -359,23 +337,16 @@ def menu_get_contact(client_instance): except ValueError: print(f"{classes.BColor.WARNING}[menu_get_contact] Please, insert a number.{classes.BColor.ENDC}") - # returns the chatId connected to the user/group/channel/etc. if key < len(users): - return users[key].id, client_instance.get_chat(users[key].id).type + chat = await client_instance.get_chat(users[key].id) + return users[key].id, str(chat.type) else: chat_id = list(non_user_dict)[key - len(users)] return chat_id, non_contact_type_dict[chat_id] -def menu_get_multiple_contact(client_instance): - """ - Reads user input (for multiple research) and splits it by ";" - Args: - client_instance: Pyrogram Client, the main means for interacting with Telegram. - Returns: - ids: list with ids of the chats - """ - target_name = str(input("User separator ';' to select multiple name.\n" +async def menu_get_multiple_contact(client_instance): + target_name = str(await async_input("User separator ';' to select multiple name.\n" "Enter your decision: ")) non_user_dict = list() @@ -385,7 +356,7 @@ def menu_get_multiple_contact(client_instance): users_split = [usr.lower() for usr in users_split] users_split = [usr.strip() for usr in users_split] users_split = list(set(users_split)) - users, non_user_dict, non_contact_type_dict = get_contact(client_instance, users_split) + users, non_user_dict, non_contact_type_dict = await get_contact(client_instance, users_split) else: print("Please, use ;") @@ -411,7 +382,8 @@ def menu_get_multiple_contact(client_instance): print(f"[{classes.BColor.OKBLUE}*{classes.BColor.ENDC}] " + str(key) + " " + chat_data_to_log) key += 1 ids.append(user.id) - types_dict[user.id] = client_instance.get_chat(user.id).type + chat = await client_instance.get_chat(user.id) + types_dict[user.id] = str(chat.type) for chat_id in non_user_dict: print(f"[{classes.BColor.OKBLUE}*{classes.BColor.ENDC}] " + str(key) + " " + non_user_dict[chat_id] + " (" + str(non_contact_type_dict[chat_id]) + ")") @@ -422,19 +394,7 @@ def menu_get_multiple_contact(client_instance): return ids, types_dict -def get_multiple_chat_ids_by_dialogs(client_instance, multiple_ids_chats): - """ - Analyze the list of chat ids - Args: - client_instance: Pyrogram Client, the main means for interacting with Telegram. - multiple_ids_chats: list of chats ids to analyze. - Returns: - chat_ids_list: list of all chat ids to analyze - chat_id_usernames_dict: dictionary with chat_id as keys and usernames as values - chat_id_title_dict: dictionary with chat_id as keys and chat title as values - chat_id_full_name_dict: dictionary with chat_id as keys and full name (first name and last name) as values - chat_id_phone_number_dict: dictionary with chat_id as keys and phone number as values - """ +async def get_multiple_chat_ids_by_dialogs(client_instance, multiple_ids_chats): chat_ids_list = list() chat_id_usernames_dict = dict() chat_id_title_dict = dict() @@ -442,18 +402,14 @@ def get_multiple_chat_ids_by_dialogs(client_instance, multiple_ids_chats): chat_id_phone_number_dict = dict() for ids_chats in multiple_ids_chats: - for dialog in client_instance.iter_dialogs(): - # If user hasn't specified a particular user to extract or if he wants to extract a particular chat + async for dialog in client_instance.get_dialogs(): if dialog.chat.id == ids_chats: - # if (single_chat_id is None) or (single_chat_id is not None and dialog.chat.id == sci): if dialog.chat.username is not None: chat_ids_list.append(dialog.chat.id) chat_id_usernames_dict[dialog.chat.id] = dialog.chat.username - # Tries to get the person phone number retrieving his id; - # it's necessary a single-item list for get_users() ids = list() ids.append(dialog.chat.id) - user_obj_list = client_instance.get_users(ids) + user_obj_list = await client_instance.get_users(ids) if user_obj_list and user_obj_list[0].phone_number is not None: chat_id_phone_number_dict[dialog.chat.id] = user_obj_list[0].phone_number @@ -469,35 +425,20 @@ def get_multiple_chat_ids_by_dialogs(client_instance, multiple_ids_chats): if dialog.chat.first_name is not None and dialog.chat.id not in chat_ids_list: if dialog.chat.id not in chat_ids_list: chat_ids_list.append(dialog.chat.id) - # Identify the full name of the person who the chat relates to formatted_name = dialog.chat.first_name if dialog.chat.last_name is not None: formatted_name = formatted_name + " " + dialog.chat.last_name chat_id_full_name_dict[dialog.chat.id] = formatted_name - # Tries to get the person phone number retrieving his id ids = list() ids.append(dialog.chat.id) - user_obj_list = client_instance.get_users(ids) + user_obj_list = await client_instance.get_users(ids) if user_obj_list and user_obj_list[0].phone_number is not None: chat_id_phone_number_dict[dialog.chat.id] = user_obj_list[0].phone_number return chat_ids_list, chat_id_usernames_dict, chat_id_title_dict, chat_id_full_name_dict, chat_id_phone_number_dict -def get_chat_ids_by_dialogs(client_instance, single_chat_id=None): - """ - One specified chat or all chats - Args: - client_instance: Pyrogram Client, the main means for interacting with Telegram. - single_chat_id: if this param is None, all chats are retrieved; otherwise, only one chat is retrieved. - Returns: - chat_ids_list: list of all chat ids to analyze - chat_id_usernames_dict: dictionary with chat_id as keys and usernames as values - chat_id_title_dict: dictionary with chat_id as keys and chat title as values - chat_id_full_name_dict: dictionary with chat_id as keys and full name (first name and last name) as values - deleted_chat_ids: list of deleted chats' ids - chat_id_phone_number_dict: dictionary with chat_id as keys and phone number as values - """ +async def get_chat_ids_by_dialogs(client_instance, single_chat_id=None): chat_ids_list = list() chat_id_usernames_dict = dict() chat_id_title_dict = dict() @@ -506,17 +447,14 @@ def get_chat_ids_by_dialogs(client_instance, single_chat_id=None): deleted_chat_ids = list() chat_type_dict = dict() - for dialog in client_instance.iter_dialogs(): - # If user hasn't specified a particular user to extract or if he wants to extract a particular chat + async for dialog in client_instance.get_dialogs(): if (single_chat_id is None) or (single_chat_id is not None and dialog.chat.id == single_chat_id): if dialog.chat.username is not None: chat_ids_list.append(dialog.chat.id) chat_id_usernames_dict[dialog.chat.id] = dialog.chat.username - # Tries to get the person phone number retrieving his id; - # it's necessary a single-item list for get_users() ids = list() ids.append(dialog.chat.id) - user_obj_list = client_instance.get_users(ids) + user_obj_list = await client_instance.get_users(ids) if user_obj_list and user_obj_list[0].phone_number is not None: chat_id_phone_number_dict[dialog.chat.id] = user_obj_list[0].phone_number @@ -532,15 +470,13 @@ def get_chat_ids_by_dialogs(client_instance, single_chat_id=None): if dialog.chat.first_name is not None: if dialog.chat.id not in chat_ids_list: chat_ids_list.append(dialog.chat.id) - # Identify the full name of the person who the chat relates to formatted_name = dialog.chat.first_name if dialog.chat.last_name is not None: formatted_name = formatted_name + " " + dialog.chat.last_name chat_id_full_name_dict[dialog.chat.id] = formatted_name - # Tries to get the person phone number retrieving his id ids = list() ids.append(dialog.chat.id) - user_obj_list = client_instance.get_users(ids) + user_obj_list = await client_instance.get_users(ids) if user_obj_list and user_obj_list[0].phone_number is not None: chat_id_phone_number_dict[dialog.chat.id] = user_obj_list[0].phone_number @@ -549,29 +485,15 @@ def get_chat_ids_by_dialogs(client_instance, single_chat_id=None): "it means the other user deleted his account".format(dialog.chat.id)) deleted_chat_ids.append(dialog.chat.id) - chat_type_dict[dialog.chat.id] = dialog.chat.type + chat_type_dict[dialog.chat.id] = str(dialog.chat.type) return chat_ids_list, chat_id_usernames_dict, chat_id_title_dict, \ chat_id_full_name_dict, deleted_chat_ids, chat_id_phone_number_dict, chat_type_dict -def write_all_chats_logs_file(client_instance, chat_ids_list, chat_id_usernames_dict, chat_id_title_dict, +async def write_all_chats_logs_file(client_instance, chat_ids_list, chat_id_usernames_dict, chat_id_title_dict, chat_id_full_name_dict, deleted_chat_ids, chat_id_phone_number_dict, chat_type): - """ - Writes the chat logs for all chats (also deleted chats) - Args: - client_instance: Pyrogram Client, the main means for interacting with Telegram. - single_chat_id: if this param is None, all chats are retrieved; otherwise, only one chat is retrieved. - Returns: - chat_ids_list: list of all chat ids to analyze - chat_id_usernames_dict: dictionary with chat_id as keys and usernames as values - chat_id_title_dict: dictionary with chat_id as keys and chat title as values - chat_id_full_name_dict: dictionary with chat_id as keys and full name (first name and last name) as values - deleted_chat_ids: list of deleted chats' ids - chat_id_phone_number_dict: dictionary with chat_id as keys and phone number as values - """ header_string = _ALL_CHATS_HEADER_STRING - # Create logs file for every contact on the phone for chat_id in chat_ids_list: chat_data_to_log = "" if chat_id in chat_id_usernames_dict: @@ -583,7 +505,6 @@ def write_all_chats_logs_file(client_instance, chat_ids_list, chat_id_usernames_ if chat_id in chat_id_title_dict: chat_data_to_log = chat_data_to_log + "{};".format(chat_id_title_dict[chat_id]) - # creating file name file_name_prefix = "" if chat_id in chat_id_usernames_dict: file_name_prefix = file_name_prefix + "{}_".format(chat_id_usernames_dict[chat_id]) @@ -597,24 +518,22 @@ def write_all_chats_logs_file(client_instance, chat_ids_list, chat_id_usernames_ file_name_prefix = file_name_prefix + chat_type else: file_name_prefix = file_name_prefix + chat_type[chat_id] - # Removing illegal characters from file name name file_name_prefix = (file_name_prefix.replace("\\", "_")).replace("/", "_") - # Creates the directory where to store medias directory_name = file_name_prefix file_name = file_name_prefix + ".csv" file_name = _CHAT_PATH + _OS_SEP + file_name - - # Logs about existing chats print(f"[{classes.BColor.OKBLUE}write_all_chats_logs_file{classes.BColor.ENDC}]" + " Processing chat with {}".format(chat_data_to_log)) - log_lines, partecipants_ids = get_chat_logs_by_identifier(client_instance, chat_id, directory_name) - with open(file_name, 'w', encoding='utf-16') as file: # encoding necessary to correctly represent emojis + + # Async call + log_lines, partecipants_ids = await get_chat_logs_by_identifier(client_instance, chat_id, directory_name) + + with open(file_name, 'w', encoding='utf-16') as file: file.write(header_string) for msgLog in log_lines: file.write("\n" + msgLog) - # Partecipants file if partecipants_ids: print(f"[{classes.BColor.OKBLUE}write_all_chats_logs_file{classes.BColor.ENDC}] " f"Processing members chats \n\n") @@ -629,7 +548,13 @@ def write_all_chats_logs_file(client_instance, chat_ids_list, chat_id_usernames_ with open(saving_file_path, "w", encoding="UTF-16") as file: file.write(header + "\n") - for user in client_instance.get_users(partecipants_ids): + # get_users is async + users_list = await client_instance.get_users(partecipants_ids) + # It might return a single user if list len is 1, ensure iterable + if not isinstance(users_list, list): + users_list = [users_list] + + for user in users_list: file.write(classes.User(user).to_string() + "\n" ) else: print(f"[{classes.BColor.FAIL}write_all_members_channel_logs_file{classes.BColor.ENDC}] " @@ -637,9 +562,7 @@ def write_all_chats_logs_file(client_instance, chat_ids_list, chat_id_usernames_ f"Telegram denies the possibility to get the full list of members;\n it's possible to show only users" f"who wrote at least one message into the chat." + "\n\n") - # if there are deleted chats if len(deleted_chat_ids) != 0: - # Logs about deleted chats print(f"[{classes.BColor.OKBLUE}write_all_chats_logs_file{classes.BColor.ENDC}] Processing deleted chats \n\n") for chat_id in deleted_chat_ids: header_string = _ALL_CHATS_HEADER_STRING @@ -649,26 +572,23 @@ def write_all_chats_logs_file(client_instance, chat_ids_list, chat_id_usernames_ print(f"[{classes.BColor.OKBLUE}write_all_chats_logs_file{classes.BColor.ENDC}] Processing " + str(chat_id) + " deleted chat") - log_lines, partecipants_ids = get_chat_logs_by_identifier(client_instance, chat_id, directory_name) - with open(file_name, 'w', encoding='utf-16') as file: # encoding necessary to correctly represent emojis + + # Async call + log_lines, partecipants_ids = await get_chat_logs_by_identifier(client_instance, chat_id, directory_name) + + with open(file_name, 'w', encoding='utf-16') as file: file.write(header_string) for msgLog in log_lines: file.write("\n" + msgLog) -def write_group_chats_members(client_instance, chat_title_list): - """ - Writes the log file with the partecipants of a chat. - Log is in format: FirstName_LastName_ID or Username_ID or FirstName_ID or FirstName_LastName_ID - Args: - client_instance: client instance - chat_title_list: the dictionary contained id and title for channel - """ +async def write_group_chats_members(client_instance, chat_title_list): for chat_id in chat_title_list: title = chat_title_list[chat_id] list_username = list() try: - for member in client_instance.get_chat_members(chat_id): + # get_chat_members is async + async for member in client_instance.get_chat_members(chat_id): list_username.append(classes.User(member.user).to_string()) except AttributeError: print(f"[{classes.BColor.FAIL}write_all_members_channel_logs_file{classes.BColor.ENDC}] " @@ -682,7 +602,6 @@ def write_group_chats_members(client_instance, chat_title_list): f"Processing members chats \n\n") header = "MEMBERS" - # Removing illegal characters from file name name file_name = (title.replace("\\", "_")).replace("/", "_") name_file = file_name + ".csv" directory = _MEMBERS_PATH @@ -702,39 +621,31 @@ def write_group_chats_members(client_instance, chat_title_list): def clean_extraction_folder(): - """ - Cleans the entire extraction folder, deleting all previous extractions - """ folder = "extraction" print(f"[{classes.BColor.OKBLUE}clean_extraction_folder{classes.BColor.ENDC}] " f"Removing files from folder " + folder) - for filename in os.listdir(folder): - file_path = os.path.join(folder, filename) - try: - if os.path.isfile(file_path) or os.path.islink(file_path): - os.unlink(file_path) - elif os.path.isdir(file_path): - shutil.rmtree(file_path) - except Exception as e: - print('Failed to delete %s. Reason: %s' % (file_path, e)) + if os.path.exists(folder): + for filename in os.listdir(folder): + file_path = os.path.join(folder, filename) + try: + if os.path.isfile(file_path) or os.path.islink(file_path): + os.unlink(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) + except Exception as e: + print('Failed to delete %s. Reason: %s' % (file_path, e)) print(f"[{classes.BColor.OKBLUE}clean_extraction_folder{classes.BColor.ENDC}] Folder cleaned successfully\n") def create_extraction_folders(): - """ - Creates the extraction folders used to save extracted chats, media and members - """ print(f"[{classes.BColor.OKBLUE}create_extraction_folders{classes.BColor.ENDC}] Creating extraction folders") - # creating chat path if not os.path.exists(_CHAT_PATH): os.makedirs(_CHAT_PATH) - # creating members path if not os.path.exists(_MEMBERS_PATH): os.makedirs(_MEMBERS_PATH) - # creating media path if not os.path.exists(_MEDIA_PATH): os.makedirs(_MEDIA_PATH) @@ -743,11 +654,6 @@ def create_extraction_folders(): def compress_and_hash_extraction(): - """ - Creates a zip archive with the content of the current extraction - and a txt file wish the hashes of the archive in MD5 and SHA512 - """ - print( f"[{classes.BColor.OKBLUE}compress_and_hash_extraction{classes.BColor.ENDC}] Creating extraction zip archive...") try: @@ -776,7 +682,6 @@ def compress_and_hash_extraction(): md5_hash = hashlib.md5() with open(_EXTRACTION_ZIP, "rb") as f: - # Read and update hash string value in blocks of 4K for byte_block in iter(lambda: f.read(4096), b""): sha512_hash.update(byte_block) md5_hash.update(byte_block) @@ -795,65 +700,73 @@ def compress_and_hash_extraction(): def show_banner(): - print(" _______ _ ______ _ \n" - "|__ __| | | | ____| | | \n" - " | | ___| | ___ __ _ _ __ __ _ _ __ ___ | |__ __ ___ __ ___ _ __| |_ ___ _ __ \n" - " | |/ _ \ |/ _ \/ _` | '__/ _` | '_ ` _ \ | __| \ \/ / '_ \ / _ \| '__| __/ _ \ '__|\n" - " | | __/ | __/ (_| | | | (_| | | | | | | | |____ > <| |_) | (_) | | | || __/ | \n" - " |_|\___|_|\___|\__, |_| \__,_|_| |_| |_| |______/_/\_\ .__/ \___/|_| \__\___|_| \n" - " __/ | | | \n" - " |___/ |_| -By DMD \n" + print(r" _______ _ ______ _ " + "\n" + r"|__ __| | | | ____| | | " + "\n" + r" | | ___| | ___ __ _ _ __ __ _ _ __ ___ | |__ __ ___ __ ___ _ __| |_ ___ _ __ " + "\n" + r" | |/ _ \ |/ _ \/ _` | '__/ _` | '_ ` _ \ | __| \ \/ / '_ \ / _ \| '__| __/ _ \ '__|" + "\n" + r" | | __/ | __/ (_| | | | (_| | | | | | | | |____ > <| |_) | (_) | | | || __/ | " + "\n" + r" |_|\___|_|\___|\__, |_| \__,_|_| |_| |_| |______/_/\_\ .__/ \___/|_| \__\___|_| " + "\n" + r" __/ | | | " + "\n" + r" |___/ |_| -By DMD " + "\n" ) - -if __name__ == "__main__": +# Main async logic +async def main(): show_banner() - response = -1 - # Create an instance of the pyrogram client + + config = configparser.ConfigParser() + config.read("config.ini") + + try: + my_api_id = config.get("pyrogram", "api_id").strip() + my_api_hash = config.get("pyrogram", "api_hash").strip() + except Exception as e: + print(f"{classes.BColor.FAIL}Error reading config.ini. Make sure it exists and has [pyrogram] section with api_id and api_hash.{classes.BColor.ENDC}") + return if os.path.exists("extraction"): - clean_folder = input("Do you want to clean extraction folder from previous extractions files? (y/N): ") + clean_folder = await async_input("Do you want to clean extraction folder from previous extractions files? (y/N): ") if clean_folder == 'y': clean_extraction_folder() + response = -1 while response != 0: - update_folders() - - with Client("my_account", hide_password=True) as client: - + + # Keep client open for the whole session loop + async with Client("my_account", api_id=my_api_id, api_hash=my_api_hash, hide_password=True) as client: try: - type_of_extraction = int(input("\nEnter: \n[1] to extract the chats for a single user " + # Use async_input to prevent loop blocking + choice_str = await async_input("\nEnter: \n[1] to extract the chats for a single user " " \n[2] to extract the chats for multiple users" " \n[3] to extract all chats" " \n[-1] to quit" - " \nPlease enter your choice: ")) + " \nPlease enter your choice: ") + type_of_extraction = int(choice_str) if type_of_extraction == 1: create_extraction_folders() - # Get chat logs for a user-specified chat - chatId, chat_type = menu_get_contact(client) - chatIdsList, chatIdUsernamesDict, chatIdTitleDict, chatIdFullNameDict, deletedChatIds, chatIdPhoneNumberDict, chat_type_list = get_chat_ids_by_dialogs( + chatId, chat_type = await menu_get_contact(client) + chatIdsList, chatIdUsernamesDict, chatIdTitleDict, chatIdFullNameDict, deletedChatIds, chatIdPhoneNumberDict, chat_type_list = await get_chat_ids_by_dialogs( client, chatId) - write_all_chats_logs_file(client, chatIdsList, chatIdUsernamesDict, chatIdTitleDict, + await write_all_chats_logs_file(client, chatIdsList, chatIdUsernamesDict, chatIdTitleDict, chatIdFullNameDict, deletedChatIds, chatIdPhoneNumberDict, chat_type) compress_and_hash_extraction() elif type_of_extraction == 2: create_extraction_folders() - chatIds, chat_types = menu_get_multiple_contact(client) + chatIds, chat_types = await menu_get_multiple_contact(client) chatIdsList, chatIdUsernamesDict, chatIdTitleDict, chatIdFullNameDict, chatIdPhoneNumberDict = \ - get_multiple_chat_ids_by_dialogs(client, chatIds) - write_all_chats_logs_file(client, chatIdsList, chatIdUsernamesDict, chatIdTitleDict, + await get_multiple_chat_ids_by_dialogs(client, chatIds) + await write_all_chats_logs_file(client, chatIdsList, chatIdUsernamesDict, chatIdTitleDict, chatIdFullNameDict, [], chatIdPhoneNumberDict, chat_types) compress_and_hash_extraction() elif type_of_extraction == 3: create_extraction_folders() - # Get chat logs for all chats chatIdsList, chatIdUsernamesDict, chatIdTitleDict, chatIdFullNameDict, deletedChatIds, \ - chatIdPhoneNumberDict, chat_type_dict = get_chat_ids_by_dialogs(client) - write_all_chats_logs_file(client, chatIdsList, chatIdUsernamesDict, chatIdTitleDict, chatIdFullNameDict, + chatIdPhoneNumberDict, chat_type_dict = await get_chat_ids_by_dialogs(client) + await write_all_chats_logs_file(client, chatIdsList, chatIdUsernamesDict, chatIdTitleDict, chatIdFullNameDict, deletedChatIds, chatIdPhoneNumberDict, chat_type_dict) compress_and_hash_extraction() @@ -864,6 +777,11 @@ def show_banner(): except ValueError: print("Please select a correct number.") except Exception as e: - if not e.__str__().__contains__("No contacts found"): - print(e.__str__()) + if not "No contacts found" in str(e): + print(str(e)) + # Break loop on critical error to prevent infinite restart loop + if "closed database" in str(e): + response = 0 +if __name__ == "__main__": + asyncio.run(main())