diff --git a/maps/SOI/compress.py b/maps/SOI/compress.py index 87c1a4e..8963dc1 100644 --- a/maps/SOI/compress.py +++ b/maps/SOI/compress.py @@ -339,7 +339,7 @@ def close(self): self.file_fp = None def run(self): - sheet_no = file_p.name.replace('.pdf', '') + sheet_no = Path(filename).name.replace('.pdf', '') export_file = export_dir / f'{sheet_no}.jpg' if export_file.exists(): return @@ -401,10 +401,10 @@ def download_from_github(p): if __name__ == '__main__': import sys - from_list_file = Path(sys.srgv[1]) + from_list_file = Path(sys.argv[1]) from_list = from_list_file.read_text().split('\n') - from_list = [ f.strip() for f in from_list ] + from_list = [ f.strip() for f in from_list if f.strip() != '' ] total = len(from_list) special_cases = {} @@ -417,7 +417,7 @@ def download_from_github(p): for sheet in from_list: count += 1 filename = f'data/raw/{sheet}.pdf' - print('handling {sheet=} {count}/{total}') + print(f'handling {sheet=} {count}/{total}') extra, extra_ancillary = get_extra(special_cases, filename) converter = Converter(filename, extra, extra_ancillary) converter.run() diff --git a/maps/SOI/util/get_work_for_compressor.sh b/maps/SOI/util/get_work_for_compressor.sh index 7dfad28..86844bc 100755 --- a/maps/SOI/util/get_work_for_compressor.sh +++ b/maps/SOI/util/get_work_for_compressor.sh @@ -5,7 +5,7 @@ wget https://storage.googleapis.com/soi_data/compressed/list.txt -O jpgs_list.tx comm <(cat pdfs_list.txt| cut -d" " -f2 | sort) <(cat jpgs_list.txt | sort) | cut -f1 | grep "^[0-9]" > temp.txt -uv run python -c "from known_problems import known_problems as kp; l = [ k.replace('data/raw/', '').replace('.pdf', '') for k in kp ]; print('\n'.join(l) + '\n')" > kp.txt +uv run python -c "from known_problems import known_problems as kp; l = [ k.replace('data/raw/', '').replace('.pdf', '') for k in kp ]; print('\n'.join(l))" > kp.txt comm <(cat temp.txt | sort) <(cat kp.txt | sort) | cut -f1 | grep "^[0-9]" > $1 diff --git a/maps/SOI/util/get_work_for_parser.sh b/maps/SOI/util/get_work_for_parser.sh index abb6ecb..928891f 100755 --- a/maps/SOI/util/get_work_for_parser.sh +++ b/maps/SOI/util/get_work_for_parser.sh @@ -5,7 +5,7 @@ gh release download soi-tiffs -p list.txt -O tiffs_list.txt --clobber comm <(cat pdfs_list.txt| cut -d" " -f2 | sort) <(cat tiffs_list.txt | cut -d" " -f2 | sort) | cut -f1 | grep "^[0-9]" > temp.txt -uv run python -c "from known_problems import known_problems as kp; l = [ k.replace('data/raw/', '').replace('.pdf', '') for k in kp ]; print('\n'.join(l) + '\n')" > kp.txt +uv run python -c "from known_problems import known_problems as kp; l = [ k.replace('data/raw/', '').replace('.pdf', '') for k in kp ]; print('\n'.join(l))" > kp.txt comm <(cat temp.txt | sort) <(cat kp.txt | sort) | cut -f1 | grep "^[0-9]" > $1