-
Notifications
You must be signed in to change notification settings - Fork 3
/
text_utils.py
47 lines (37 loc) · 1.27 KB
/
text_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import numpy as np
import os
import re
def get_vocab(joined_string):
return sorted(set(joined_string))
def vectorize_vocab(vocab):
return {u: i for i, u in enumerate(vocab)}, np.array(vocab)
def vectorize_string(char2idx, string):
vectorized_output = np.array([char2idx[char] for char in string])
return vectorized_output
def load_abc_data(name):
with open(os.path.join("TrainingData", name), "r") as f:
text = f.read()
songs = extract_song_snippet(text)
return songs
def extract_song_snippet(text):
pattern = '(^|\n\n)(.*?)\n\n'
search_results = re.findall(pattern, text, overlapped=True, flags=re.DOTALL)
songs = [song[1] for song in search_results]
print("Found {} songs in text".format(len(songs)))
return songs
def save_song_to_abc(song, filename="tmp"):
save_name = "{}.abc".format(filename)
with open(save_name, "w") as f:
f.write(song)
return filename
def clean_abc(in_path, out_path):
file1 = open(in_path,
'r')
file2 = open(out_path,
'w')
for line in file1.readlines():
if not (line.startswith('%')) and not (line.startswith("Z")) and not (line.startswith("N")):
print(line)
file2.write(line)
file2.close()
file1.close()