Skip to content

Commit

Permalink
Rev 715: Implement batch download using tag list file
Browse files Browse the repository at this point in the history
  • Loading branch information
trickerer01 committed Dec 17, 2024
1 parent d8cdf9e commit 0e02452
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 30 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@ Note that Ruxx does not restrict your searches to a couple pages or something. Y
- **Connection -> Cache processed HTML** ‒ by default HTML is cached as raw bytes, enabling this makes Ruxx cache HTML after it was processed into manageable form - a little bit faster but consumes much more memory. Mainly affects RS module
- **Actions -> Download** \<Ctrl+Shift+D> ‒ same as download button
- **Actions -> Check tags** \<Ctrl+Shift+C> ‒ same as check tags button
- **Actions -> Batch download using tag list...** - Read and process tags using a text file. Each line forms a string which then gets put into **Tags** field and downloaded. Warning: download starts immediately! Adjust settings and download options beforehand
- **Actions -> Clear log** \<Ctrl+Shift+E> ‒ same as clear log button
- **Tools -> Load from ID list**Allows you to load **ID** tag list from a text file. The resulting tags will look like `(id:x~id:y~id:z)` which is an ***OR*** group [expression](#tags-syntax), effectively allowing you to search for those ids. ~~Broken since about 10.07.2021. Refer to "Broken things" RX forum subsection for details.~~ Re-enabled since version `1.1.284` for all modules using a workaround, but doesn't run in parallel so be aware of that
- **Tools -> Load from ID list**Load **ID** tag list from a text file. The resulting tags will look like `(id:x~id:y~id:z)` which is an ***OR*** group [expression](#tags-syntax), effectively allowing you to search for those ids. ~~Broken since about 10.07.2021. Refer to "Broken things" RX forum subsection for details.~~ Re-enabled since version `1.1.284` for all modules using a workaround, but doesn't run in parallel so be aware of that
- **Tools -> Un-tag files...** ‒ renames selected Ruxx-downloaded media files, stripping file names of all extra info
- **Tools -> Re-tag files...** ‒ renames selected Ruxx-downloaded media files, re-appending extra info. You'll need dumped tags info file(s) (see **Edit -> Save tags**)
- **Tools -> Sort files into subfolders...** ‒ a set of tools to separate downloaded files if need be:
Expand Down
26 changes: 22 additions & 4 deletions src/app_file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
)
from app_module import ProcModule

__all__ = ('prepare_tags_list',)
__all__ = ('prepare_id_list', 'prepare_tag_lists')

re_comments = re_compile(r'^(?:--|//|#).*?$')
re_separators = re_compile(r'(?:, *| +)')
Expand Down Expand Up @@ -69,7 +69,7 @@ def id_list_from_string(id_str: str) -> list[str]:
return id_str.strip().split(' ')


def parse_file(filepath: str) -> tuple[bool, list[str]]:
def parse_ids_file(filepath: str) -> tuple[bool, list[str]]:
id_list = list()
try:
for line in open(filepath, 'rt', encoding=UTF8).readlines():
Expand All @@ -84,10 +84,28 @@ def parse_file(filepath: str) -> tuple[bool, list[str]]:
return False, id_list


def prepare_tags_list(filepath: str) -> tuple[bool, str]:
suc, id_list = parse_file(filepath)
def prepare_id_list(filepath: str) -> tuple[bool, str]:
suc, id_list = parse_ids_file(filepath)
return suc, f'({"~".join(id_list)})'


def parse_tags_file(filepath: str) -> tuple[bool, list[str]]:
tag_list: list[str] = list()
try:
for line in open(filepath, 'rt', encoding=UTF8).readlines():
line = line.strip(' \n\ufeff')
if len(line) == 0 or re_comments.fullmatch(line):
continue
tag_list.append(line)
return True, list(unique_everseen(tag_list))
except Exception:
return False, tag_list


def prepare_tag_lists(filepath: str) -> tuple[bool, list[str]]:
suc, tag_lists = parse_tags_file(filepath)
return suc, tag_lists

#
#
#########################################
107 changes: 91 additions & 16 deletions src/app_gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import sys
from collections.abc import Callable
from datetime import datetime
from multiprocessing.dummy import current_process
from os import path, system, makedirs, remove, replace, getcwd
from threading import Thread
from time import sleep as thread_sleep
Expand All @@ -37,7 +38,7 @@
set_console_shown, unfocus_buttons_once, help_tags, help_about, load_id_list, browse_path, register_menu_command, toggle_console,
register_submenu_command, register_menu_checkbutton, register_menu_radiobutton, register_submenu_radiobutton, register_menu_separator,
get_all_media_files_in_cur_dir, update_lastpath, toggle_autocompletion, trigger_autocomplete_tag, hotkey_text, config_menu,
get_media_files_dir,
get_media_files_dir, load_batch_download_tag_list,
)
from app_gui_defines import (
STATE_DISABLED, STATE_NORMAL, COLOR_WHITE, COLOR_BROWN1, COLOR_PALEGREEN, OPTION_VALUES_VIDEOS, OPTION_VALUES_IMAGES,
Expand All @@ -61,6 +62,7 @@
__all__ = ('run_ruxx_gui',)

# loaded
batch_download_thread: Thread | None = None
download_thread: Thread | None = None
tags_check_thread: Thread | None = None
duplicates_check_thread: Thread | None = None
Expand Down Expand Up @@ -336,7 +338,9 @@ def set_proc_module(dwnmodule: int) -> None:


def update_widget_enabled_states() -> None:
batching = is_processing_batch()
downloading = is_downloading()
batching_or_downloading = batching or downloading
checkingtags = is_cheking_tags()
checkingdupes = is_checking_duplicates()
i: Menus
Expand All @@ -361,7 +365,7 @@ def update_widget_enabled_states() -> None:
elif i == Menus.TOOLS and j == SubMenus.IDLIST and ProcModule.is_rz():
newstate = STATE_DISABLED
else:
newstate = STATE_DISABLED if downloading else menu_item_orig_states[i][j]
newstate = STATE_DISABLED if batching_or_downloading else menu_item_orig_states[i][j]
config_menu(i, j, state=newstate)
gi: Globals
for gi in [g for g in Globals.__members__.values() if g < Globals.MAX_GOBJECTS]:
Expand Down Expand Up @@ -675,6 +679,10 @@ def recheck_args() -> tuple[bool, str]:
return True, ''


def is_processing_batch() -> bool:
return (batch_download_thread is not None) and batch_download_threadm().is_alive()


def is_downloading() -> bool:
return (download_thread is not None) and download_threadm().is_alive()

Expand All @@ -688,60 +696,119 @@ def is_checking_duplicates() -> bool:


def update_download_state() -> None:
global batch_download_thread
global download_thread
global prev_download_state

batching = is_processing_batch()
downloading = is_downloading()
batching_or_downloading = batching or downloading
checkingtags = is_cheking_tags()
checkingdupes = is_checking_duplicates()
if prev_download_state != downloading:
if prev_download_state != batching_or_downloading:
update_widget_enabled_states()
gi: Globals
for gi in [g for g in Globals.__members__.values() if g < Globals.MAX_GOBJECTS]:
if gi in (Globals.MODULE_ICON,):
pass # config_global(i, state=gobject_orig_states[i])
elif gi == Globals.BUTTON_DOWNLOAD:
if not downloading:
if not batching_or_downloading:
config_global(gi, state=(STATE_DISABLED if checkingdupes else gobject_orig_states[gi]))
elif gi == Globals.BUTTON_CHECKTAGS:
if not checkingtags:
config_global(gi, state=(STATE_DISABLED if downloading or checkingdupes else gobject_orig_states[gi]))
config_global(gi, state=(STATE_DISABLED if batching_or_downloading or checkingdupes else gobject_orig_states[gi]))
else:
config_global(gi, state=(STATE_DISABLED if downloading else gobject_orig_states[gi]))
config_global(gi, state=(STATE_DISABLED if batching_or_downloading else gobject_orig_states[gi]))
# special case 1: _download button: turn into cancel button
dw_button = get_global(Globals.BUTTON_DOWNLOAD)
if downloading:
if batching_or_downloading:
dw_button.config(text='Cancel', command=cancel_download)
else:
dw_button.config(text='Download', command=do_download)

if not downloading and (download_thread is not None):
download_threadm().join() # make thread terminate
del download_thread
download_thread = None
if not batching_or_downloading:
if batch_download_thread is not None:
batch_download_threadm().join() # make thread terminate
del batch_download_thread
batch_download_thread = None
if download_thread is not None:
download_threadm().join() # make thread terminate
del download_thread
download_thread = None

prev_download_state = downloading
prev_download_state = batching_or_downloading

rootm().after(int(THREAD_CHECK_PERIOD_DEFAULT), update_download_state)


def cancel_download() -> None:
if is_processing_batch():
batch_download_threadm().killed = True
if is_downloading():
download_threadm().killed = True


def do_process_batch() -> None:
global batch_download_thread

if is_processing_batch():
return

cmdlines = load_batch_download_tag_list()
if not cmdlines:
return

batch_download_thread = Thread(target=start_batch_download_thread, args=(cmdlines,))
batch_download_threadm().killed = False
batch_download_threadm().start()


def start_batch_download_thread(cmdlines: list[str]) -> None:
cmdline_errors = list[str]()
for cmdline1 in cmdlines:
parse_result, _ = parse_tags(cmdline1)
if not parse_result:
cmdline_errors.append(f'Invalid tags: \'{cmdline1}\'')
if len(cmdline_errors) >= 5:
cmdline_errors.append('...')
break
if cmdline_errors:
messagebox.showerror('Nope', '\n'.join(cmdline_errors))
return

n = '\n '
trace(f'\n[batcher] Processing {len(cmdlines):d} tag lists:{n}{n.join(cmdlines)}')
unfocus_buttons_once()
processed_count = 0
for idx, cmdline2 in enumerate(cmdlines):
config_global(Globals.FIELD_TAGS, state=STATE_NORMAL)
setrootconf(Options.TAGS, cmdline2)
config_global(Globals.FIELD_TAGS, state=STATE_DISABLED)
do_download()
if download_thread is None or not download_threadm().is_alive():
messagebox.showerror('Nope', f'Error processing tag list {idx + 1:d}: \'{cmdline2}\'!')
break
download_threadm().join()
if getattr(current_process(), 'killed', False) is True:
break
processed_count += 1

trace(f'\n[batcher] Successfully processed {processed_count:d} / {len(cmdlines):d} {ProcModule.name().upper()} tag lists')


def do_download() -> None:
global download_thread

if is_menu_disabled(Menus.ACTIONS, SubMenus.DOWNLOAD):
if is_downloading():
return

suc, msg = recheck_args()
if not suc:
messagebox.showwarning('Nope', msg)
return

get_global(Globals.BUTTON_DOWNLOAD).focus_force()
if not is_processing_batch():
get_global(Globals.BUTTON_DOWNLOAD).focus_force()

# force cmd line update
update_frame_cmdline()
Expand All @@ -761,7 +828,8 @@ def do_download() -> None:
download_threadm().gui = True
download_threadm().start()

unfocus_buttons_once()
if not is_processing_batch():
unfocus_buttons_once()


def start_download_thread(cmdline: list[str]) -> None:
Expand Down Expand Up @@ -835,11 +903,13 @@ def init_menus() -> None:
register_menu_checkbutton('Download without proxy', CVARS[Options.PROXY_NO_DOWNLOAD])
register_menu_checkbutton('Ignore proxy', CVARS[Options.IGNORE_PROXY])
register_menu_checkbutton('Cache processed HTML', CVARS[Options.CACHE_PROCCED_HTML])
# 6) Action
# 6) Actions
register_menu('Actions', Menus.ACTIONS)
register_menu_command('Download', do_download, Options.ACTION_DOWNLOAD, True)
register_menu_command('Check tags', check_tags_direct, Options.ACTION_CHECKTAGS, True)
register_menu_separator()
register_menu_command('Batch download using tag list...', do_process_batch, Options.ACTION_DOWNLOAD_BATCH)
register_menu_separator()
register_menu_command('Clear log', Logger.wnd.clear, Options.ACTION_CLEARLOG, True)
# 7) Tools
register_menu('Tools', Menus.TOOLS)
Expand Down Expand Up @@ -933,6 +1003,11 @@ def init_gui() -> None:


# Helper wrappers: solve unnecessary NoneType warnings
def batch_download_threadm() -> Thread:
assert batch_download_thread
return batch_download_thread


def download_threadm() -> Thread:
assert download_thread
return download_thread
Expand Down
23 changes: 17 additions & 6 deletions src/app_gui_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
PROXY_DEFAULT_STR, USER_AGENT, PROGRESS_BAR_MAX, PLATFORM_WINDOWS, DATE_MIN_DEFAULT, CONNECT_TIMEOUT_BASE, DATE_MAX_DEFAULT,
KNOWN_EXTENSIONS_STR, CONNECT_RETRIES_BASE, SITENAME_B_RX, SITENAME_B_RN, SITENAME_B_RS, SITENAME_B_RZ, SITENAME_B_RP, SITENAME_B_EN,
)
from app_file_parser import prepare_tags_list
from app_file_parser import prepare_id_list, prepare_tag_lists
from app_file_sorter import FileTypeFilter
from app_gui_defines import (
BUT_ESCAPE, BUT_RETURN, STATE_READONLY, STATE_DISABLED, TOOLTIP_DELAY_DEFAULT, FONT_SANS_SMALL, COLOR_LIGHTGRAY, STATE_NORMAL,
Expand Down Expand Up @@ -58,10 +58,10 @@
'setrootconf', 'int_vars', 'rootm', 'getrootconf', 'window_hcookiesm', 'window_proxym', 'window_timeoutm', 'window_retriesm',
'register_menu', 'register_submenu', 'GetRoot', 'create_base_window_widgets', 'text_cmdm', 'get_icon', 'init_additional_windows',
'get_global', 'config_global', 'is_global_disabled', 'is_menu_disabled', 'is_focusing', 'toggle_console', 'hotkey_text',
'get_curdir', 'set_console_shown', 'unfocus_buttons_once', 'help_tags', 'help_about', 'load_id_list', 'ask_filename', 'browse_path',
'register_menu_command', 'register_submenu_command', 'register_menu_checkbutton', 'register_menu_radiobutton',
'register_submenu_radiobutton', 'register_menu_separator', 'get_all_media_files_in_cur_dir', 'get_media_files_dir',
'update_lastpath', 'config_menu', 'toggle_autocompletion', 'trigger_autocomplete_tag',
'get_curdir', 'set_console_shown', 'unfocus_buttons_once', 'help_tags', 'help_about', 'load_id_list', 'load_batch_download_tag_list',
'ask_filename', 'browse_path', 'register_menu_command', 'register_submenu_command', 'register_menu_checkbutton',
'register_menu_radiobutton', 'register_submenu_radiobutton', 'register_menu_separator', 'get_all_media_files_in_cur_dir',
'get_media_files_dir', 'update_lastpath', 'config_menu', 'toggle_autocompletion', 'trigger_autocomplete_tag',
)


Expand Down Expand Up @@ -1396,7 +1396,7 @@ def help_about(title: str = f'About {APP_NAME}', message: str = ABOUT_MSG) -> No
def load_id_list() -> None:
filepath = ask_filename((('Text files', '*.txt'), ('All files', '*.*')))
if filepath:
success, file_tags = prepare_tags_list(filepath)
success, file_tags = prepare_id_list(filepath)
if success:
setrootconf(Options.TAGS, file_tags)
# reset settings for immediate downloading
Expand All @@ -1406,6 +1406,17 @@ def load_id_list() -> None:
messagebox.showwarning(message=f'Unable to load ids from {filepath[filepath.rfind("/") + 1:]}!')


def load_batch_download_tag_list() -> list[str]:
filepath = ask_filename((('Text files', '*.txt'), ('All files', '*.*')))
if filepath:
success, file_tag_lists = prepare_tag_lists(filepath)
if success:
return file_tag_lists
else:
messagebox.showwarning(message=f'Unable to load tags from {filepath[filepath.rfind("/") + 1:]}!')
return []


def ask_filename(ftypes: Iterable[tuple[str, str]]) -> str:
fullpath = filedialog.askopenfilename(filetypes=ftypes, initialdir=get_curdir())
if fullpath and len(fullpath) > 0:
Expand Down
6 changes: 4 additions & 2 deletions src/app_gui_defines.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ class Options(IntEnum):
AUTOCOMPLETION_ENABLE = auto()
TAGLISTS_PATH = auto()
ACTION_DOWNLOAD = auto() # unbound, internal
ACTION_DOWNLOAD_BATCH = auto() # unbound, internal
ACTION_CHECKTAGS = auto() # unbound, internal
ACTION_CLEARLOG = auto() # unbound, internal
ACTION_OPEN_DWN_FOLDER = auto() # unbound, internal
Expand Down Expand Up @@ -350,12 +351,13 @@ def __str__(self) -> str:


# submenus with changing states
# TODO: automate this through 'register(sub)menuX' funcs
class SubMenus(IntEnum):
SAVE, LOAD, RESET, OPENFOLDER = 0, 1, 3, 5
PREFIX, STAGS, SSOURCE, SCOMMENTS, SMODE, EXTEND, WNONEMPTY, VERBOSE = 0, 2, 3, 4, 5, 7, 8, 9
RX, RN, RS, RZ, RP, EN = 0, 1, 2, 3, 4, 5
HEADERS, PROXY, TIMEOUT, RETRIES, DWPROXY, IGNOREPROXY, CACHEMODE = 0, 1, 2, 3, 4, 5, 6
DOWNLOAD, CHECKTAGS, CLEARLOG = 0, 1, 3
DOWNLOAD, CHECKTAGS, DBATCH, CLEARLOG = 0, 1, 3, 5
IDLIST, UNTAG, RETAG, SORT, DUPLICATES, AUTOCOMPLETEE, AUTOCOMPLETER = 0, 2, 3, 5, 7, 9, 10
DFULL, DSKIP, DTOUCH = 0, 1, 2

Expand All @@ -379,7 +381,7 @@ def __bool__(self) -> bool:
Menus.MODULE: RuxxMenu(SubMenus.RX, SubMenus.RN, SubMenus.RS, SubMenus.RZ, SubMenus.RP, SubMenus.EN),
Menus.CONNECTION: RuxxMenu(SubMenus.HEADERS, SubMenus.PROXY, SubMenus.TIMEOUT, SubMenus.RETRIES, SubMenus.DWPROXY,
SubMenus.IGNOREPROXY, SubMenus.CACHEMODE),
Menus.ACTIONS: RuxxMenu(SubMenus.DOWNLOAD, SubMenus.CHECKTAGS),
Menus.ACTIONS: RuxxMenu(SubMenus.DOWNLOAD, SubMenus.CHECKTAGS, SubMenus.DBATCH),
Menus.TOOLS: RuxxMenu(SubMenus.IDLIST, SubMenus.UNTAG, SubMenus.RETAG, SubMenus.SORT, SubMenus.DUPLICATES,
SubMenus.AUTOCOMPLETEE, SubMenus.AUTOCOMPLETER),
Menus.DEBUG: RuxxMenu(SubMenus.DFULL, SubMenus.DSKIP, SubMenus.DTOUCH),
Expand Down
2 changes: 1 addition & 1 deletion src/app_revision.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
APP_NAME = 'Ruxx'
APP_VER_MAJOR = '1'
APP_VER_SUB = '6'
APP_REVISION = '714'
APP_REVISION = '715'
APP_IS_BETA = False
APP_IS_BETA_TEXT = 'b' * APP_IS_BETA
APP_REV_DATE = '17 Dec 2024'
Expand Down

0 comments on commit 0e02452

Please sign in to comment.