diff --git a/.gitignore b/.gitignore index b6e4761..29a23a4 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,11 @@ dmypy.json # Pyre type checker .pyre/ + +# PyCharm / JetBrains IDEs +/.idea + +# pa_to_ap specific stuff +/podcast_addict_extracted +/*.db +/*.backup diff --git a/README.md b/README.md index c1b690f..b7a391a 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,18 @@ Migrate data from Podcast Addict's to AntennaPod's database This does not use any IDs for matching feeds and episodes from one db to another, as those tend to be very unreliable. (They're supposed to stay the same, but often they don't.) Instead, we match them by their name and, in some cases, other attributes. This will work even if the name changed. For example, when using the script one episode's name changed from something like `123. Great Title` to just `Great Title`, but they were still matched. +## Configuration +There are a few optional configuration settings you can change by editing the variables at the top of `pa_to_ap.py` before running the script. + +MATCH_ON_EPISODE_URL_IF_COULD_NOT_FIND_A_MATCH_OTHERWISE = True +* `TRANSFER_DOWNLOADED_EPISODES` controls if existing downloads in Podcast Addict are copied to AntennaPod. + Additional steps are required, see steps below. + * Default: `True` (downloads are transferred.) +* `EPISODES_DIR_PATH ` controls the directory path for transferred episodes (to which you have to manually copy/move the files to). + * Default: `/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict` +* `MATCH_ON_EPISODE_URL_IF_COULD_NOT_FIND_A_MATCH_OTHERWISE` If a name match for a given episode is not found, this setting controls if we should try to match on the episode media URL instead. + * Default: `True` (URL match is used as a fallback.) + ## Steps 0. Install Python 3.8 or later @@ -20,18 +32,17 @@ This does not use any IDs for matching feeds and episodes from one db to another 5. Run the [`pa_to_ap.py`](pa_to_ap.py) script (AntennaPod db file will be **modified**!) in a terminal 6. Confirm that matches are correct (if they aren't you may need to increase `min_similarity`) 7. Copy the modified db file back to your phone -8. Create `/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict` -9. Manually move (or copy) the folders **inside** -`/storage/emulated/0/Android/data/com.bambuna.podcastaddict/files/podcast/` -**to** -`/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict` - as AntennaPod cannot access the files under the other app's directory (Consider making a backup of these files.) -10. Import the modified db in AntennaPod +8. If you chose to enable `TRANSFER_DOWNLOADED_EPISODES` (this is on by default): + 1. Create `/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict` + 2. Manually move (or copy) the folders **inside** + `/storage/emulated/0/Android/data/com.bambuna.podcastaddict/files/podcast/` + **to** + `/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict` + as AntennaPod cannot access the files under the other app's directory (Consider making a backup of these files.) +9. Import the modified db in AntennaPod Enjoy! -Of course, you can change the location (to which you have to manually copy/move the files to) by modifying the `EPISODES_DIR_PATH` before running the script. - ## Warning Note that this is somewhat rough and will likely not handle a lot of edge cases. diff --git a/pa_to_ap.py b/pa_to_ap.py index 6ed0914..03da3f3 100644 --- a/pa_to_ap.py +++ b/pa_to_ap.py @@ -3,7 +3,8 @@ import sys import zipfile import sqlite3 -from dataclasses import dataclass +import functools +from dataclasses import dataclass, field from operator import itemgetter from pathlib import Path from sqlite3 import Cursor @@ -14,9 +15,11 @@ CUR_PATH = Path() +TRANSFER_DOWNLOADED_EPISODES = True EPISODES_DIR_PATH = '/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict' MATCH_ON_EPISODE_URL_IF_COULD_NOT_FIND_A_MATCH_OTHERWISE = True +AP_TAG_SEPARATOR = "\u001e" # Separator character for AP tag column blob @dataclass class Feed: @@ -25,8 +28,32 @@ class Feed: description: str author: str keep_updated: int - folder_name: str = '' + feed_url: str +@dataclass +class PAFeed(Feed): + tag: int # Tag for single row from JOIN + tags: list[int] = field(default_factory=list, init=False) # For merged PAFeed rows this will contain all tags + folder_name: str + + def tag_names(self, pa_tags: dict[int, str]): + return [pa_tags[x] for x in self.tags] + +@dataclass +class APFeed(Feed): + _tags: str + + @property + def tags_str(self): + return self._tags + + @property + def tags(self): + return self._tags.split(AP_TAG_SEPARATOR) if self._tags is not None else list() + + @tags.setter + def tags(self, value: list[str]): + self._tags = AP_TAG_SEPARATOR.join(value) def error(msg): print("ERROR:", msg) @@ -73,46 +100,49 @@ def get_antenna_pod_and_podcast_addict_backup_path(): def transfer(podcast_addict_cur: Cursor, antenna_pod_cur: Cursor): - # first find match for all feeds in pa - pa_feeds = [Feed(*a) for a in podcast_addict_cur.execute( - 'select _id, name, description, author, ' - 'automaticRefresh, folderName from podcasts ' - 'where subscribed_status = 1 and is_virtual = 0')] + # first find match for all feeds in pa, left join on tags relation table (so there may be multiple rows for each podcast) + pa_feeds_one_to_many_tags = [PAFeed(*a) for a in podcast_addict_cur.execute( + 'SELECT podcasts._id, podcasts.name, description, author, ' + 'automaticRefresh, feed_url, tag_relation.tag_id, folderName FROM podcasts ' + 'LEFT JOIN tag_relation ON tag_relation.podcast_id = podcasts._id ' + 'WHERE subscribed_status = 1 AND is_virtual = 0 AND initialized_status = 1')] + + # Collate multiple JOIN rows for each podcast if they had multiple tags + def reduce_by_tag(feeds: dict[str, PAFeed], current_feed: PAFeed): + if current_feed.id not in feeds: + if current_feed.tag is not None: + current_feed.tags.append(current_feed.tag) + feeds[current_feed.id] = current_feed + elif current_feed.tag is not None: + existing_feed: PAFeed = feeds[current_feed.id] + existing_feed.tags.append(current_feed.tag) + return feeds + + pa_feeds_dict = functools.reduce(reduce_by_tag, pa_feeds_one_to_many_tags, dict()) + pa_feeds = pa_feeds_dict.values() + + pa_tags: dict[int, str] = dict(podcast_addict_cur.execute('SELECT _id, name FROM tags')) print("# Podcast addict feeds:") for feed in pa_feeds: print(feed.name) - print() - print() - - ap_feeds = [Feed(*a) for a in antenna_pod_cur.execute( - 'select id, title, description, author, keep_updated from Feeds ' - 'where downloaded = 1')] + print("\n\n") - feed_attr_to_weight = { # - (lambda f: f.name): 0.85, # - (lambda f: f.author): 0.15, # - } - matcher = ObjectListMatcher(feed_attr_to_weight) - - # should never be larger than the largest weight (otherwise is - # slightly unpredictable, as not every weight will be evaluated) - # value in range [0, 1] - matcher.minimum_similarity = 0.78 + ap_feeds = {a[5]: APFeed(*a) for a in antenna_pod_cur.execute( + 'select id, title, description, author, keep_updated, download_url, tags from Feeds ' + )} pa_to_ap = [] - ap_indices = matcher.get_indices(pa_feeds, ap_feeds) for n, pa in enumerate(pa_feeds): - ap_idx = ap_indices[n] - ap_name = '!!! NO MATCH !!!' - if ap_idx >= 0: - ap = ap_feeds[ap_idx] + pa_name = pa.name if pa.name else pa.feed_url + if pa.feed_url in ap_feeds: + ap = ap_feeds[pa.feed_url] ap_name = ap.name pa_to_ap.append((pa, ap)) - print(pa.name, ap_name, sep=" -> ") + print(pa_name, ap_name, sep=" -> ") print() if not confirmed("Is this correct? Can we continue?"): @@ -125,13 +155,13 @@ def transfer(podcast_addict_cur: Cursor, antenna_pod_cur: Cursor): # # FIXME: make it work if premium and non-premium share same name # if ap.name == "Name of same podcast but premium version": # transfer_from_feed_to_feed(podcast_addict_cur, - # antenna_pod_cur, pa, ap) + # antenna_pod_cur, pa, ap, pa_tags) # break # break for pa, ap in pa_to_ap: - transfer_from_feed_to_feed(podcast_addict_cur, antenna_pod_cur, pa, ap) + transfer_from_feed_to_feed(podcast_addict_cur, antenna_pod_cur, pa, ap, pa_tags) print() # break @@ -142,8 +172,9 @@ def transfer(podcast_addict_cur: Cursor, antenna_pod_cur: Cursor): def transfer_from_feed_to_feed(podcast_addict_cur: Cursor, # antenna_pod_cur: Cursor, # - pa: Feed, # - ap: Feed): + pa: PAFeed, # + ap: APFeed, + pa_tags: dict[int, str]): print(f'# Feed: {ap.name}') antenna_pod_cur.execute("UPDATE Feeds " "SET keep_updated = ? " @@ -153,10 +184,10 @@ def transfer_from_feed_to_feed(podcast_addict_cur: Cursor, # pa_episodes = list(podcast_addict_cur.execute( # # 0 1 n2 n3 n4 'select _id, name, seen_status, favorite, local_file_name, ' - # n5 n6 n7 n8 - 'playbackDate, duration_ms, chapters_extracted, download_url ' + # n5 n6 n7 n8 n9 + 'playbackDate, duration_ms, chapters_extracted, download_url, position_to_resume ' 'from episodes where podcast_id = ? and ' - '(seen_status = 1 or ' + '(seen_status = 1 or position_to_resume < 0 or ' '(local_file_name != "" and local_file_name IS NOT NULL))', (pa.id,))) @@ -164,16 +195,21 @@ def transfer_from_feed_to_feed(podcast_addict_cur: Cursor, # 'select fi.id, fi.title, fm.download_url ' 'from FeedItems fi ' 'LEFT JOIN FeedMedia fm ON fi.id = fm.feeditem ' - 'where fi.feed = ? and fi.read = 0 ', (ap.id,))) + 'where fi.feed = ? and fi.read = 0 ' + , (ap.id,))) - print() combinations = len(pa_episodes) * len(ap_episodes) - print(f"Rough estimate: {combinations / 4000:.2f} seconds") - print() - print() + print(f"\nRough estimate: {combinations / 4000:.2f} seconds\n\n") pa_indices = ITEM_MATCHER.get_indices(ap_episodes, pa_episodes) seen_match_count = 0 + # Transfer tags, merge any existing tags with PA tags + ap.tags = list(set(ap.tags).union(pa.tag_names(pa_tags))) + antenna_pod_cur.execute("UPDATE Feeds " + "SET tags = ? " + "WHERE id = ?", # + (ap.tags_str, ap.id,)) + for ap_ep, pa_idx in zip(ap_episodes, pa_indices): if pa_idx < 0: @@ -184,13 +220,15 @@ def transfer_from_feed_to_feed(podcast_addict_cur: Cursor, # if MATCH_ON_EPISODE_URL_IF_COULD_NOT_FIND_A_MATCH_OTHERWISE and ap_url is not None: ap_url = ap_url.strip() if len(ap_url) > 9: - for pa_idx, pa_ep in enumerate(pa_episodes): + for pa_idx_urlmatch, pa_ep in enumerate(pa_episodes): if not pa_ep[8]: continue pa_url = pa_ep[8].strip() if pa_url and pa_url == ap_url: + print(f"! Fallback to URL match for: {ap_ep[1]}") found = True + pa_idx = pa_idx_urlmatch break if not found: @@ -202,13 +240,17 @@ def transfer_from_feed_to_feed(podcast_addict_cur: Cursor, # if pa_ep[2]: transfer_from_seen_ep_to_ep(antenna_pod_cur, podcast_addict_cur, # pa_ep, ap_ep) + else: + transfer_progress_ep_to_ep(antenna_pod_cur, podcast_addict_cur, # + pa_ep, ap_ep) + if pa_ep[3]: antenna_pod_cur.execute( "INSERT INTO Favorites (feeditem, feed) VALUES " "(?, ?)", (ap_ep[0], ap.id)) - if pa_ep[4]: + if pa_ep[4] and TRANSFER_DOWNLOADED_EPISODES: transfer_from_dld_ep_to_ep(antenna_pod_cur, podcast_addict_cur, # pa_ep, ap_ep, pa.folder_name) @@ -226,10 +268,9 @@ def transfer_chapters(antenna_pod_cur: Cursor, # for title, start in podcast_addict_cur.execute( # "select name, start from chapters " "where podcastId = ? and episodeId = ?", (pa_feed_id, pa_ep[0])): - # we use chapter type 2 (id3) simply because it seems most likely antenna_pod_cur.execute("INSERT INTO SimpleChapters " - "(title, start, feeditem, type) VALUES " - "(?, ?, ?, 2)", (title, start, ap_ep[0],)) + "(title, start, feeditem) VALUES " + "(?, ?, ?)", (title, start, ap_ep[0])) def transfer_from_dld_ep_to_ep(antenna_pod_cur: Cursor, # @@ -237,7 +278,7 @@ def transfer_from_dld_ep_to_ep(antenna_pod_cur: Cursor, # pa_ep: tuple, # ap_ep: tuple, # pa_folder_name: str): - pa_ep_id, _, _, _, pa_local_file_name, _, _, _, _ = pa_ep + pa_ep_id, _, _, _, pa_local_file_name, _, _, _, _, _ = pa_ep dir_path = EPISODES_DIR_PATH.rstrip("/") + "/" + pa_folder_name file_path = dir_path + "/" + pa_local_file_name @@ -252,8 +293,8 @@ def transfer_from_seen_ep_to_ep(antenna_pod_cur: Cursor, # podcast_addict_cur: Cursor, # pa_ep: tuple, # ap_ep: tuple): - print(ap_ep[1], " <> ", pa_ep[1]) - pa_ep_id, _, _, _, _, pa_playbackDate, pa_duration_ms, _, _ = pa_ep + print(ap_ep[1], " <> ", pa_ep[1]) + pa_ep_id, _, _, _, _, pa_playbackDate, pa_duration_ms, _, _, _ = pa_ep antenna_pod_cur.execute("UPDATE FeedItems SET read = 1 WHERE id = ?", (ap_ep[0],)) @@ -265,13 +306,26 @@ def transfer_from_seen_ep_to_ep(antenna_pod_cur: Cursor, # (pa_playbackDate, pa_playbackDate, pa_duration_ms, ap_ep[0],)) +def transfer_progress_ep_to_ep(antenna_pod_cur: Cursor, + podcast_addict_cur: Cursor, + pa_ep: tuple, + ap_ep: tuple): + print(ap_ep[1], " <> ", pa_ep[1]) + pa_ep_id, _, _, _, _, pa_playbackDate, pa_duration_ms, _, _, pa_position = pa_ep + + antenna_pod_cur.execute("UPDATE FeedMedia " + "SET last_played_time = ?, " + "position = ?, " + "played_duration = ? " + "WHERE feeditem = ?", + (pa_playbackDate, pa_position, pa_position, + ap_ep[0],)) + ap_db, pa_db = get_antenna_pod_and_podcast_addict_backup_path() -print() -print("AntennaPod .db file found:", ap_db) +print("\nAntennaPod .db file found:", ap_db) print("Podcast Addict .db file found:", pa_db) -print() -print() +print("\n") podcast_addict_con = None antenna_pod_con = None