Skip to content

Commit

Permalink
SOI: more fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
ramSeraph committed Mar 2, 2025
1 parent 7ff354c commit 0bbfc1b
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 6 deletions.
8 changes: 4 additions & 4 deletions maps/SOI/compress.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ def close(self):
self.file_fp = None

def run(self):
sheet_no = file_p.name.replace('.pdf', '')
sheet_no = Path(filename).name.replace('.pdf', '')
export_file = export_dir / f'{sheet_no}.jpg'
if export_file.exists():
return
Expand Down Expand Up @@ -401,10 +401,10 @@ def download_from_github(p):

if __name__ == '__main__':
import sys
from_list_file = Path(sys.srgv[1])
from_list_file = Path(sys.argv[1])

from_list = from_list_file.read_text().split('\n')
from_list = [ f.strip() for f in from_list ]
from_list = [ f.strip() for f in from_list if f.strip() != '' ]
total = len(from_list)

special_cases = {}
Expand All @@ -417,7 +417,7 @@ def download_from_github(p):
for sheet in from_list:
count += 1
filename = f'data/raw/{sheet}.pdf'
print('handling {sheet=} {count}/{total}')
print(f'handling {sheet=} {count}/{total}')
extra, extra_ancillary = get_extra(special_cases, filename)
converter = Converter(filename, extra, extra_ancillary)
converter.run()
Expand Down
2 changes: 1 addition & 1 deletion maps/SOI/util/get_work_for_compressor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ wget https://storage.googleapis.com/soi_data/compressed/list.txt -O jpgs_list.tx

comm <(cat pdfs_list.txt| cut -d" " -f2 | sort) <(cat jpgs_list.txt | sort) | cut -f1 | grep "^[0-9]" > temp.txt

uv run python -c "from known_problems import known_problems as kp; l = [ k.replace('data/raw/', '').replace('.pdf', '') for k in kp ]; print('\n'.join(l) + '\n')" > kp.txt
uv run python -c "from known_problems import known_problems as kp; l = [ k.replace('data/raw/', '').replace('.pdf', '') for k in kp ]; print('\n'.join(l))" > kp.txt


comm <(cat temp.txt | sort) <(cat kp.txt | sort) | cut -f1 | grep "^[0-9]" > $1
Expand Down
2 changes: 1 addition & 1 deletion maps/SOI/util/get_work_for_parser.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ gh release download soi-tiffs -p list.txt -O tiffs_list.txt --clobber

comm <(cat pdfs_list.txt| cut -d" " -f2 | sort) <(cat tiffs_list.txt | cut -d" " -f2 | sort) | cut -f1 | grep "^[0-9]" > temp.txt

uv run python -c "from known_problems import known_problems as kp; l = [ k.replace('data/raw/', '').replace('.pdf', '') for k in kp ]; print('\n'.join(l) + '\n')" > kp.txt
uv run python -c "from known_problems import known_problems as kp; l = [ k.replace('data/raw/', '').replace('.pdf', '') for k in kp ]; print('\n'.join(l))" > kp.txt

comm <(cat temp.txt | sort) <(cat kp.txt | sort) | cut -f1 | grep "^[0-9]" > $1

Expand Down

0 comments on commit 0bbfc1b

Please sign in to comment.