Skip to content

Commit

Permalink
start of doing something
Browse files Browse the repository at this point in the history
  • Loading branch information
justDabuK committed Apr 14, 2018
1 parent 7f038cf commit 0c436ec
Show file tree
Hide file tree
Showing 15 changed files with 4,603 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.idea
*.pyc
Empty file added __init__.py
Empty file.
195 changes: 195 additions & 0 deletions analyzeMovieSound.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
import os, sys, shutil, glob, numpy, csv, cPickle
import scipy.io.wavfile as wavfile
import audioBasicIO
import audioTrainTest as aT
import audioSegmentation as aS
import matplotlib.pyplot as plt
import scipy.spatial.distance
minDuration = 7;

def classifyFolderWrapper(inputFolder, modelType, modelName, outputMode=False):
if not os.path.isfile(modelName):
raise Exception("Input modelName not found!")

if modelType=='svm':
[Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadSVModel(modelName)
elif modelType=='knn':
[Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadKNNModel(modelName)

PsAll = numpy.zeros((len(classNames), ))

files = "*.wav"
if os.path.isdir(inputFolder):
strFilePattern = os.path.join(inputFolder, files)
else:
strFilePattern = inputFolder + files

wavFilesList = []
wavFilesList.extend(glob.glob(strFilePattern))
wavFilesList = sorted(wavFilesList)
if len(wavFilesList)==0:
print "No WAV files found!"
return

Results = []
for wavFile in wavFilesList:
[Fs, x] = audioBasicIO.readAudioFile(wavFile)
signalLength = x.shape[0] / float(Fs)
[Result, P, classNames] = aT.fileClassification(wavFile, modelName, modelType)
PsAll += (numpy.array(P) * signalLength)
Result = int(Result)
Results.append(Result)
if outputMode:
print "{0:s}\t{1:s}".format(wavFile,classNames[Result])
Results = numpy.array(Results)

# print distribution of classes:
[Histogram, _] = numpy.histogram(Results, bins=numpy.arange(len(classNames)+1))
if outputMode:
for i,h in enumerate(Histogram):
print "{0:20s}\t\t{1:d}".format(classNames[i], h)
PsAll = PsAll / numpy.sum(PsAll)


if outputMode:
fig = plt.figure()
ax = fig.add_subplot(111)
plt.title("Classes percentage " + inputFolder.replace('Segments',''))
ax.axis((0, len(classNames)+1, 0, 1))
ax.set_xticks(numpy.array(range(len(classNames)+1)))
ax.set_xticklabels([" "] + classNames)
ax.bar(numpy.array(range(len(classNames)))+0.5, PsAll)
plt.show()
return classNames, PsAll

def getMusicSegmentsFromFile(inputFile):
modelType = "svm"
modelName = "data/svmMovies8classes"

dirOutput = inputFile[0:-4] + "_musicSegments"

if os.path.exists(dirOutput) and dirOutput!=".":
shutil.rmtree(dirOutput)
os.makedirs(dirOutput)

[Fs, x] = audioBasicIO.readAudioFile(inputFile)

if modelType=='svm':
[Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadSVModel(modelName)
elif modelType=='knn':
[Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadKNNModel(modelName)

flagsInd, classNames, acc, CM = aS.mtFileClassification(inputFile, modelName, modelType, plotResults = False, gtFile = "")
segs, classes = aS.flags2segs(flagsInd, mtStep)

for i, s in enumerate(segs):
if (classNames[int(classes[i])] == "Music") and (s[1] - s[0] >= minDuration):
strOut = "{0:s}{1:.3f}-{2:.3f}.wav".format(dirOutput+os.sep, s[0], s[1])
wavfile.write( strOut, Fs, x[int(Fs*s[0]):int(Fs*s[1])])

def analyzeDir(dirPath):
for i,f in enumerate(glob.glob(dirPath + os.sep + '*.wav')): # for each WAV file
getMusicSegmentsFromFile(f)
[c, P]= classifyFolderWrapper(f[0:-4] + "_musicSegments", "svm", "data/svmMusicGenre8", False)
if i==0:
print "".ljust(100)+"\t",
for C in c:
print C.ljust(12)+"\t",
print
print f.ljust(100)+"\t",
for p in P:
print "{0:.2f}".format(p).ljust(12)+"\t",
print

def main(argv):

if argv[1]=="--file":
getMusicSegmentsFromFile(argv[2])
classifyFolderWrapper(argv[2][0:-4] + "_musicSegments", "svm", "data/svmMusicGenre8", True)

elif argv[1]=="--dir":
analyzeDir(argv[2])

elif argv[1]=="--sim":
csvFile = argv[2]
f = []
fileNames = []
with open(csvFile, 'rb') as csvfile:
spamreader = csv.reader(csvfile, delimiter='\t', quotechar='|')
for j,row in enumerate(spamreader):
if j>0:
ftemp = []
for i in range(1,9):
ftemp.append(float(row[i]))
f.append(ftemp)
R = row[0]
II = R.find(".wav");
fileNames.append(row[0][0:II])
f = numpy.array(f)

Sim = numpy.zeros((f.shape[0], f.shape[0]))
for i in range(f.shape[0]):
for j in range(f.shape[0]):
Sim[i,j] = scipy.spatial.distance.cdist(numpy.reshape(f[i,:], (f.shape[1],1)).T, numpy.reshape(f[j,:], (f.shape[1],1)).T, 'cosine')

Sim1 = numpy.reshape(Sim, (Sim.shape[0]*Sim.shape[1], 1))
plt.hist(Sim1)
plt.show()

fo = open(csvFile + "_simMatrix", "wb")
cPickle.dump(fileNames, fo, protocol = cPickle.HIGHEST_PROTOCOL)
cPickle.dump(f, fo, protocol = cPickle.HIGHEST_PROTOCOL)
cPickle.dump(Sim, fo, protocol = cPickle.HIGHEST_PROTOCOL)
fo.close()

elif argv[1]=="--loadsim":
try:
fo = open(argv[2], "rb")
except IOError:
print "didn't find file"
return
try:
fileNames = cPickle.load(fo)
f = cPickle.load(fo)
Sim = cPickle.load(fo)
except:
fo.close()
fo.close()
print fileNames
Sim1 = numpy.reshape(Sim, (Sim.shape[0]*Sim.shape[1], 1))
plt.hist(Sim1)
plt.show()

elif argv[1]=="--audio-event-dir":
files = "*.wav"
inputFolder = argv[2]
if os.path.isdir(inputFolder):
strFilePattern = os.path.join(inputFolder, files)
else:
strFilePattern = inputFolder + files

wavFilesList = []
wavFilesList.extend(glob.glob(strFilePattern))
wavFilesList = sorted(wavFilesList)
for i,w in enumerate(wavFilesList):
[flagsInd, classesAll, acc, CM] = aS.mtFileClassification(w, "data/svmMovies8classes", "svm", False, '')
histTemp = numpy.zeros( (len(classesAll), ) )
for f in flagsInd:
histTemp[int(f)] += 1.0
histTemp /= histTemp.sum()

if i==0:
print "".ljust(100)+"\t",
for C in classesAll:
print C.ljust(12)+"\t",
print
print w.ljust(100)+"\t",
for h in histTemp:
print "{0:.2f}".format(h).ljust(12)+"\t",
print


return 0

if __name__ == '__main__':
main(sys.argv)
67 changes: 67 additions & 0 deletions audacityAnnotation2WAVs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import glob
import os
import audioBasicIO
import sys
import csv
import scipy.io.wavfile as wavfile


def annotation2files(wavFile, csvFile):
'''
Break an audio stream to segments of interest,
defined by a csv file
- wavFile: path to input wavfile
- csvFile: path to csvFile of segment limits
Input CSV file must be of the format <T1>\t<T2>\t<Label>
'''

[Fs, x] = audioBasicIO.readAudioFile(wavFile)
with open(csvFile, 'rb') as csvfile:
reader = csv.reader(csvfile, delimiter='\t', quotechar='|')
for j, row in enumerate(reader):
T1 = float(row[0].replace(",","."))
T2 = float(row[1].replace(",","."))
label = "%s_%s_%.2f_%.2f.wav" % (wavFile, row[2], T1, T2)
label = label.replace(" ", "_")
xtemp = x[int(round(T1*Fs)):int(round(T2*Fs))]
print T1, T2, label, xtemp.shape
wavfile.write(label, Fs, xtemp)

def main(argv):
if argv[1] == "-f":
wavFile = argv[2]
annotationFile = argv[3]
annotation2files(wavFile, annotationFile)
elif argv[1] == "-d":
inputFolder = argv[2]
types = ('*.txt', '*.csv')
annotationFilesList = []
for files in types:
annotationFilesList.extend(glob.glob(os.path.join(inputFolder, files)))
for anFile in annotationFilesList:
wavFile = os.path.splitext(anFile)[0] + ".wav"
if not os.path.isfile(wavFile):
wavFile = os.path.splitext(anFile)[0] + ".mp3"
if not os.path.isfile(wavFile):
print "Audio file not found!"
return
annotation2files(wavFile, anFile)


if __name__ == '__main__':
# Used to extract a series of annotated WAV files based on (a) an audio file (mp3 or wav) and
# (b) a segment annotation file e.g. a "label" file generated in audacity
#
# usage 1:
# python audacityAnnotation2WAVs.py -f <audiofilepath> <annotationfilepath>
# The <annotationfilepath> is actually a tab-seperated file where each line has the format <startTime>\t<entTime>\t<classLabel>
# The result of this process is a series of WAV files with a file name <audiofilepath>_<startTime>_<endTime>_<classLabel>
#
# usage 2:
# python audacityAnnotation2WAVs.py -d <annotationfolderpath>
# Same but searches all .txt and .csv annotation files. Audio files are supposed to be in the same path / filename with a WAV extension

main(sys.argv)

Loading

0 comments on commit 0c436ec

Please sign in to comment.