diff --git a/.env.sample b/.env.sample index b90d0e1..c9d3931 100644 --- a/.env.sample +++ b/.env.sample @@ -24,6 +24,16 @@ MARK_FILE = "mark.log" ## Timeout for requests for jellyfin REQUEST_TIMEOUT = 300 +## Generate guids +## Generating guids is a slow process, so this is a way to speed up the process +## by using the location only, useful when using same files on multiple servers +GENERATE_GUIDS = "True" + +## Generate locations +## Generating locations is a slow process, so this is a way to speed up the process +## by using the guid only, useful when using different files on multiple servers +GENERATE_LOCATIONS = "True" + ## Max threads for processing MAX_THREADS = 32 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0eb1480..2cfc96e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,9 +60,19 @@ jobs: - name: "Run tests" run: | - # Move test/.env to root - mv test/ci.env .env - # Run script + # Test ci1 + mv test/ci1.env .env + python main.py + + # Test ci2 + mv test/ci2.env .env + python main.py + + # Test ci3 + mv test/ci3.env .env + python main.py + + # Test again to test if it can handle existing data python main.py cat mark.log diff --git a/src/functions.py b/src/functions.py index 2d3d306..3da2b2f 100644 --- a/src/functions.py +++ b/src/functions.py @@ -93,11 +93,18 @@ def search_mapping(dictionary: dict, key_value: str): return None -def future_thread_executor(args: list, threads: int = 32): +def future_thread_executor( + args: list, threads: int = None, override_threads: bool = False +): futures_list = [] results = [] - workers = min(int(os.getenv("MAX_THREADS", 32)), os.cpu_count() * 2, threads) + workers = min(int(os.getenv("MAX_THREADS", 32)), os.cpu_count() * 2) + if threads: + workers = min(threads, workers) + + if override_threads: + workers = threads # If only one worker, run in main thread to avoid overhead if workers == 1: diff --git a/src/plex.py b/src/plex.py index 631c8a7..02616c3 100644 --- a/src/plex.py +++ b/src/plex.py @@ -1,14 +1,13 @@ -import re, requests, os, traceback +import os, requests, traceback +from dotenv import load_dotenv from typing import Dict, Union, FrozenSet -import operator -from itertools import groupby as itertools_groupby from urllib3.poolmanager import PoolManager from math import floor from requests.adapters import HTTPAdapter as RequestsHTTPAdapter -from plexapi.video import Episode, Movie +from plexapi.video import Show, Episode, Movie from plexapi.server import PlexServer from plexapi.myplex import MyPlexAccount @@ -18,6 +17,7 @@ from src.functions import ( future_thread_executor, contains_nested, log_marked, + str_to_bool, ) from src.library import ( check_skip_logic, @@ -25,6 +25,12 @@ from src.library import ( ) +load_dotenv(override=True) + +generate_guids = str_to_bool(os.getenv("GENERATE_GUIDS", "True")) +generate_locations = str_to_bool(os.getenv("GENERATE_LOCATIONS", "True")) + + # Bypass hostname validation for ssl. Taken from https://github.com/pkkid/python-plexapi/issues/143#issuecomment-775485186 class HostNameIgnoringAdapter(RequestsHTTPAdapter): def init_poolmanager(self, connections, maxsize, block=..., **pool_kwargs): @@ -37,7 +43,11 @@ class HostNameIgnoringAdapter(RequestsHTTPAdapter): ) -def extract_guids_from_item(item: Union[Movie, Episode]) -> Dict[str, str]: +def extract_guids_from_item(item: Union[Movie, Show, Episode]) -> Dict[str, str]: + # If GENERATE_GUIDS is set to False, then return an empty dict + if not generate_guids: + return {} + guids: Dict[str, str] = dict( guid.id.split("://") for guid in item.guids @@ -46,7 +56,7 @@ def extract_guids_from_item(item: Union[Movie, Episode]) -> Dict[str, str]: if len(guids) == 0: logger( - f"Plex: Failed to get any guids for {item.title}, Using location only", + f"Plex: Failed to get any guids for {item.title}", 1, ) @@ -56,7 +66,9 @@ def extract_guids_from_item(item: Union[Movie, Episode]) -> Dict[str, str]: def get_guids(item: Union[Movie, Episode], completed=True): return { "title": item.title, - "locations": tuple([location.split("/")[-1] for location in item.locations]), + "locations": tuple([location.split("/")[-1] for location in item.locations]) + if generate_locations + else tuple(), "status": { "completed": completed, "time": item.viewOffset, @@ -66,7 +78,7 @@ def get_guids(item: Union[Movie, Episode], completed=True): ) # Merge the metadata and guid dictionaries -def get_user_library_watched_show(show): +def get_user_library_watched_show(show, process_episodes, threads=None): try: show_guids: FrozenSet = frozenset( ( @@ -74,31 +86,28 @@ def get_user_library_watched_show(show): "title": show.title, "locations": tuple( [location.split("/")[-1] for location in show.locations] - ), + ) + if generate_locations + else tuple(), } | extract_guids_from_item(show) ).items() # Merge the metadata and guid dictionaries ) - watched_episodes = show.watched() - episode_guids = { - # Offset group data because the first value will be the key - season: [episode[1] for episode in episodes] - for season, episodes - # Group episodes by first element of tuple (episode.parentIndex) - in itertools_groupby( - [ - ( - episode.parentIndex, - get_guids(episode, completed=episode in watched_episodes), - ) - for episode in show.episodes() - # Only include watched or partially-watched more than a minute episodes - if episode in watched_episodes or episode.viewOffset >= 60000 - ], - operator.itemgetter(0), - ) - } + episode_guids_args = [] + + for episode in process_episodes: + episode_guids_args.append([get_guids, episode, episode.isWatched]) + + episode_guids_results = future_thread_executor( + episode_guids_args, threads=threads + ) + + episode_guids = {} + for index, episode in enumerate(process_episodes): + if episode.parentIndex not in episode_guids: + episode_guids[episode.parentIndex] = [] + episode_guids[episode.parentIndex].append(episode_guids_results[index]) return show_guids, episode_guids except Exception: @@ -119,39 +128,56 @@ def get_user_library_watched(user, user_plex, library): watched = [] args = [ - [get_guids, video, True] - for video - # Get all watched movies - in library_videos.search(unwatched=False) - ] + [ - [get_guids, video, False] - for video - # Get all partially watched movies - in library_videos.search(inProgress=True) - # Only include partially-watched movies more than a minute - if video.viewOffset >= 60000 + [get_guids, video, video.isWatched] + for video in library_videos.search(unwatched=False) + + library_videos.search(inProgress=True) + if video.isWatched or video.viewOffset >= 60000 ] - for guid in future_thread_executor(args, threads=min(os.cpu_count(), 4)): + for guid in future_thread_executor(args, threads=len(args)): logger(f"Plex: Adding {guid['title']} to {user_name} watched list", 3) watched.append(guid) elif library.type == "show": watched = {} # Get all watched shows and partially watched shows - args = [ - (get_user_library_watched_show, show) - for show in library_videos.search(unwatched=False) - + library_videos.search(inProgress=True) - ] + parallel_show_task = [] + parallel_episodes_task = [] - for show_guids, episode_guids in future_thread_executor(args, threads=4): + for show in library_videos.search(unwatched=False) + library_videos.search( + inProgress=True + ): + process_episodes = [] + for episode in show.episodes(): + if episode.isWatched or episode.viewOffset >= 60000: + process_episodes.append(episode) + + # Shows with more than 24 episodes has its episodes processed in parallel + # Shows with less than 24 episodes has its episodes processed in serial but the shows are processed in parallel + if len(process_episodes) >= 24: + parallel_episodes_task.append( + [ + get_user_library_watched_show, + show, + process_episodes, + len(process_episodes), + ] + ) + else: + parallel_show_task.append( + [get_user_library_watched_show, show, process_episodes, 1] + ) + + for show_guids, episode_guids in future_thread_executor( + parallel_show_task, threads=len(parallel_show_task) + ) + future_thread_executor(parallel_episodes_task, threads=1): if show_guids and episode_guids: watched[show_guids] = episode_guids logger( f"Plex: Added {episode_guids} to {user_name} {show_guids} watched list", 3, ) + else: watched = None @@ -169,81 +195,87 @@ def get_user_library_watched(user, user_plex, library): def find_video(plex_search, video_ids, videos=None): try: - for location in plex_search.locations: - if ( - contains_nested(location.split("/")[-1], video_ids["locations"]) - is not None - ): - episode_videos = [] - if videos: - for show, seasons in videos.items(): - show = {k: v for k, v in show} - if ( - contains_nested(location.split("/")[-1], show["locations"]) - is not None - ): - for season in seasons.values(): - for episode in season: - episode_videos.append(episode) + if not generate_guids and not generate_locations: + return False, [] - return True, episode_videos - - for guid in plex_search.guids: - guid_source = re.search(r"(.*)://", guid.id).group(1).lower() - guid_id = re.search(r"://(.*)", guid.id).group(1) - - # If show provider source and show provider id are in videos_shows_ids exactly, then the show is in the list - if guid_source in video_ids.keys(): - if guid_id in video_ids[guid_source]: + if generate_locations: + for location in plex_search.locations: + if ( + contains_nested(location.split("/")[-1], video_ids["locations"]) + is not None + ): episode_videos = [] if videos: for show, seasons in videos.items(): show = {k: v for k, v in show} - if guid_source in show.keys(): - if guid_id == show[guid_source]: - for season in seasons.values(): - for episode in season: - episode_videos.append(episode) + if ( + contains_nested( + location.split("/")[-1], show["locations"] + ) + is not None + ): + for season in seasons.values(): + for episode in season: + episode_videos.append(episode) return True, episode_videos + if generate_guids: + for guid in plex_search.guids: + guid_source, guid_id = guid.id.split("://") + + # If show provider source and show provider id are in videos_shows_ids exactly, then the show is in the list + if guid_source in video_ids.keys(): + if guid_id in video_ids[guid_source]: + episode_videos = [] + if videos: + for show, seasons in videos.items(): + show = {k: v for k, v in show} + if guid_source in show.keys(): + if guid_id == show[guid_source]: + for season in seasons.values(): + for episode in season: + episode_videos.append(episode) + + return True, episode_videos + return False, [] except Exception: - logger(f"Plex: failed to find library item for {video_ids['title']}", 2) - logger(traceback.format_exc(), 2) return False, [] def get_video_status(plex_search, video_ids, videos): try: - for location in plex_search.locations: - if ( - contains_nested(location.split("/")[-1], video_ids["locations"]) - is not None - ): - for video in videos: - if ( - contains_nested(location.split("/")[-1], video["locations"]) - is not None - ): - return video["status"] + if not generate_guids and not generate_locations: + return None - for guid in plex_search.guids: - guid_source = re.search(r"(.*)://", guid.id).group(1).lower() - guid_id = re.search(r"://(.*)", guid.id).group(1) - - # If show provider source and show provider id are in videos_shows_ids exactly, then the show is in the list - if guid_source in video_ids.keys(): - if guid_id in video_ids[guid_source]: + if generate_locations: + for location in plex_search.locations: + if ( + contains_nested(location.split("/")[-1], video_ids["locations"]) + is not None + ): for video in videos: - if guid_source in video.keys(): - if guid_id == video[guid_source]: - return video["status"] + if ( + contains_nested(location.split("/")[-1], video["locations"]) + is not None + ): + return video["status"] + + if generate_guids: + for guid in plex_search.guids: + guid_source, guid_id = guid.id.split("://") + + # If show provider source and show provider id are in videos_shows_ids exactly, then the show is in the list + if guid_source in video_ids.keys(): + if guid_id in video_ids[guid_source]: + for video in videos: + if guid_source in video.keys(): + if guid_id == video[guid_source]: + return video["status"] return None except Exception: - logger(f"Plex: failed to find library item for {video_ids['title']}", 2) - logger(traceback.format_exc(), 2) return None @@ -436,7 +468,6 @@ class Plex: try: # Get all libraries users_watched = {} - args = [] for user in users: if self.admin_user == user: @@ -478,13 +509,12 @@ class Plex: ) continue - args.append([get_user_library_watched, user, user_plex, library]) + user_watched = get_user_library_watched(user, user_plex, library) - for user_watched in future_thread_executor(args): - for user, user_watched_temp in user_watched.items(): - if user not in users_watched: - users_watched[user] = {} - users_watched[user].update(user_watched_temp) + for user_watched, user_watched_temp in user_watched.items(): + if user_watched not in users_watched: + users_watched[user_watched] = {} + users_watched[user_watched].update(user_watched_temp) return users_watched except Exception as e: diff --git a/test/ci1.env b/test/ci1.env new file mode 100644 index 0000000..8c0e0f8 --- /dev/null +++ b/test/ci1.env @@ -0,0 +1,96 @@ +# Global Settings + +## Do not mark any shows/movies as played and instead just output to log if they would of been marked. +DRYRUN = "True" + +## Additional logging information +DEBUG = "True" + +## Debugging level, "info" is default, "debug" is more verbose +DEBUG_LEVEL = "debug" + +## If set to true then the script will only run once and then exit +RUN_ONLY_ONCE = "True" + +## How often to run the script in seconds +SLEEP_DURATION = 10 + +## Log file where all output will be written to +LOG_FILE = "log.log" + +## Mark file where all shows/movies that have been marked as played will be written to +MARK_FILE = "mark.log" + +## Timeout for requests for jellyfin +REQUEST_TIMEOUT = 300 + +## Max threads for processing +MAX_THREADS = 2 + +## Generate guids +## Generating guids is a slow process, so this is a way to speed up the process +# by using the location only, useful when using same files on multiple servers +GENERATE_GUIDS = "False" + +## Generate locations +## Generating locations is a slow process, so this is a way to speed up the process +## by using the guid only, useful when using different files on multiple servers +GENERATE_LOCATIONS = "True" + +## Map usernames between servers in the event that they are different, order does not matter +## Comma seperated for multiple options +USER_MAPPING = {"JellyUser":"jellyplex_watched"} + +## Map libraries between servers in the even that they are different, order does not matter +## Comma seperated for multiple options +LIBRARY_MAPPING = { "Shows": "TV Shows" } + + +## Blacklisting/Whitelisting libraries, library types such as Movies/TV Shows, and users. Mappings apply so if the mapping for the user or library exist then both will be excluded. +## Comma seperated for multiple options +#BLACKLIST_LIBRARY = "" +#WHITELIST_LIBRARY = "Movies" +#BLACKLIST_LIBRARY_TYPE = "Series" +#WHITELIST_LIBRARY_TYPE = "Movies, movie" +#BLACKLIST_USERS = "" +WHITELIST_USERS = "jellyplex_watched" + + + +# Plex + +## Recommended to use token as it is faster to connect as it is direct to the server instead of going through the plex servers +## URL of the plex server, use hostname or IP address if the hostname is not resolving correctly +## Comma seperated list for multiple servers +PLEX_BASEURL = "https://localhost:32400" + +## Plex token https://support.plex.tv/articles/204059436-finding-an-authentication-token-x-plex-token/ +## Comma seperated list for multiple servers +PLEX_TOKEN = "mVaCzSyd78uoWkCBzZ_Y" + +## If not using plex token then use username and password of the server admin along with the servername +## Comma seperated for multiple options +#PLEX_USERNAME = "PlexUser, PlexUser2" +#PLEX_PASSWORD = "SuperSecret, SuperSecret2" +#PLEX_SERVERNAME = "Plex Server1, Plex Server2" + +## Skip hostname validation for ssl certificates. +## Set to True if running into ssl certificate errors +SSL_BYPASS = "True" + +## control the direction of syncing. e.g. SYNC_FROM_PLEX_TO_JELLYFIN set to true will cause the updates from plex +## to be updated in jellyfin. SYNC_FROM_PLEX_TO_PLEX set to true will sync updates between multiple plex servers +SYNC_FROM_PLEX_TO_JELLYFIN = "True" +SYNC_FROM_JELLYFIN_TO_PLEX = "True" +SYNC_FROM_PLEX_TO_PLEX = "True" +SYNC_FROM_JELLYFIN_TO_JELLYFIN = "True" + +# Jellyfin + +## Jellyfin server URL, use hostname or IP address if the hostname is not resolving correctly +## Comma seperated list for multiple servers +JELLYFIN_BASEURL = "http://localhost:8096" + +## Jellyfin api token, created manually by logging in to the jellyfin server admin dashboard and creating an api key +## Comma seperated list for multiple servers +JELLYFIN_TOKEN = "d773c4db3ecc4b028fc0904d9694804c" diff --git a/test/ci2.env b/test/ci2.env new file mode 100644 index 0000000..90c69bb --- /dev/null +++ b/test/ci2.env @@ -0,0 +1,96 @@ +# Global Settings + +## Do not mark any shows/movies as played and instead just output to log if they would of been marked. +DRYRUN = "True" + +## Additional logging information +DEBUG = "True" + +## Debugging level, "info" is default, "debug" is more verbose +DEBUG_LEVEL = "debug" + +## If set to true then the script will only run once and then exit +RUN_ONLY_ONCE = "True" + +## How often to run the script in seconds +SLEEP_DURATION = 10 + +## Log file where all output will be written to +LOG_FILE = "log.log" + +## Mark file where all shows/movies that have been marked as played will be written to +MARK_FILE = "mark.log" + +## Timeout for requests for jellyfin +REQUEST_TIMEOUT = 300 + +## Max threads for processing +MAX_THREADS = 2 + +## Generate guids +## Generating guids is a slow process, so this is a way to speed up the process +# by using the location only, useful when using same files on multiple servers +GENERATE_GUIDS = "True" + +## Generate locations +## Generating locations is a slow process, so this is a way to speed up the process +## by using the guid only, useful when using different files on multiple servers +GENERATE_LOCATIONS = "False" + +## Map usernames between servers in the event that they are different, order does not matter +## Comma seperated for multiple options +USER_MAPPING = {"JellyUser":"jellyplex_watched"} + +## Map libraries between servers in the even that they are different, order does not matter +## Comma seperated for multiple options +LIBRARY_MAPPING = { "Shows": "TV Shows" } + + +## Blacklisting/Whitelisting libraries, library types such as Movies/TV Shows, and users. Mappings apply so if the mapping for the user or library exist then both will be excluded. +## Comma seperated for multiple options +#BLACKLIST_LIBRARY = "" +#WHITELIST_LIBRARY = "Movies" +#BLACKLIST_LIBRARY_TYPE = "Series" +#WHITELIST_LIBRARY_TYPE = "Movies, movie" +#BLACKLIST_USERS = "" +WHITELIST_USERS = "jellyplex_watched" + + + +# Plex + +## Recommended to use token as it is faster to connect as it is direct to the server instead of going through the plex servers +## URL of the plex server, use hostname or IP address if the hostname is not resolving correctly +## Comma seperated list for multiple servers +PLEX_BASEURL = "https://localhost:32400" + +## Plex token https://support.plex.tv/articles/204059436-finding-an-authentication-token-x-plex-token/ +## Comma seperated list for multiple servers +PLEX_TOKEN = "mVaCzSyd78uoWkCBzZ_Y" + +## If not using plex token then use username and password of the server admin along with the servername +## Comma seperated for multiple options +#PLEX_USERNAME = "PlexUser, PlexUser2" +#PLEX_PASSWORD = "SuperSecret, SuperSecret2" +#PLEX_SERVERNAME = "Plex Server1, Plex Server2" + +## Skip hostname validation for ssl certificates. +## Set to True if running into ssl certificate errors +SSL_BYPASS = "True" + +## control the direction of syncing. e.g. SYNC_FROM_PLEX_TO_JELLYFIN set to true will cause the updates from plex +## to be updated in jellyfin. SYNC_FROM_PLEX_TO_PLEX set to true will sync updates between multiple plex servers +SYNC_FROM_PLEX_TO_JELLYFIN = "True" +SYNC_FROM_JELLYFIN_TO_PLEX = "True" +SYNC_FROM_PLEX_TO_PLEX = "True" +SYNC_FROM_JELLYFIN_TO_JELLYFIN = "True" + +# Jellyfin + +## Jellyfin server URL, use hostname or IP address if the hostname is not resolving correctly +## Comma seperated list for multiple servers +JELLYFIN_BASEURL = "http://localhost:8096" + +## Jellyfin api token, created manually by logging in to the jellyfin server admin dashboard and creating an api key +## Comma seperated list for multiple servers +JELLYFIN_TOKEN = "d773c4db3ecc4b028fc0904d9694804c" diff --git a/test/ci.env b/test/ci3.env similarity index 88% rename from test/ci.env rename to test/ci3.env index ba9bd28..a6ca241 100644 --- a/test/ci.env +++ b/test/ci3.env @@ -27,6 +27,16 @@ REQUEST_TIMEOUT = 300 ## Max threads for processing MAX_THREADS = 2 +## Generate guids +## Generating guids is a slow process, so this is a way to speed up the process +# by using the location only, useful when using same files on multiple servers +GENERATE_GUIDS = "True" + +## Generate locations +## Generating locations is a slow process, so this is a way to speed up the process +## by using the guid only, useful when using different files on multiple servers +GENERATE_LOCATIONS = "True" + ## Map usernames between servers in the event that they are different, order does not matter ## Comma seperated for multiple options USER_MAPPING = {"JellyUser":"jellyplex_watched"} diff --git a/test/validate_ci_marklog.py b/test/validate_ci_marklog.py index 45f69bf..180b65c 100644 --- a/test/validate_ci_marklog.py +++ b/test/validate_ci_marklog.py @@ -60,6 +60,9 @@ def main(): "JellyUser/Shows/Monarch: Legacy of Monsters/Secrets and Lies", ] + # Triple the expected values because the CI runs three times + expected_values = expected_values * 3 + lines = read_marklog() if not check_marklog(lines, expected_values): print("Failed to validate marklog")