[go: nahoru, domu]

Skip to content

Commit

Permalink
add real-time localization
Browse files Browse the repository at this point in the history
  • Loading branch information
seanwood committed Jan 19, 2019
1 parent 2bf235e commit d64e3ff
Show file tree
Hide file tree
Showing 13 changed files with 128 additions and 23 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,6 @@ ENV/
# Eclipse project settings
.project
.pydevproject

# VSCode project settings
.vscode
Binary file modified README_files/realtimeGCCNMFScreenshot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/dev_A_1_2_3_4_mix.wav
Binary file not shown.
Binary file added data/dev_B_1_8_9_16_mix.wav
Binary file not shown.
Binary file added data/dev_C_2_7_10_15_mix.wav
Binary file not shown.
Binary file added data/dev_D_13_14_15_16_mix.wav
Binary file not shown.
9 changes: 6 additions & 3 deletions gccNMF/realtime/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@
from gccNMF.realtime.gccNMFPretraining import getDictionariesW

INT_OPTIONS = ['numTDOAs', 'numTDOAHistory', 'numSpectrogramHistory', 'numChannels',
'windowSize', 'hopSize', 'blockSize', 'dictionarySize', 'numHUpdates']
'windowSize', 'hopSize', 'blockSize', 'dictionarySize', 'numHUpdates',
'localizationWindowSize']
FLOAT_OPTIONS = ['gccPHATNLAlpha', 'microphoneSeparationInMetres']
BOOL_OPTIONS = ['gccPHATNLEnabled']
BOOL_OPTIONS = ['gccPHATNLEnabled', 'localizationEnabled']
STRING_OPTIONS = ['dictionaryType', 'audioPath']

def getDefaultConfig():
Expand All @@ -54,7 +55,9 @@ def getDefaultConfig():
'microphoneSeparationInMetres': '0.1',
'targetTDOAEpsilon': '5.0',
'targetTDOABeta': '2.0',
'targetTDOANoiseFloor': '0.0'}
'targetTDOANoiseFloor': '0.0',
'localizationEnabled': 'True',
'localizationWindowSize': '6'}

config['Audio'] = {'numChannels': '2',
'sampleRate': '16000',
Expand Down
72 changes: 62 additions & 10 deletions gccNMF/realtime/gccNMFInterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@
BUTTON_WIDTH = 50

class RealtimeGCCNMFInterfaceWindow(QtGui.QMainWindow):
def __init__(self, audioPath, numTDOAs, gccPHATNLAlpha, gccPHATNLEnabled, dictionariesW, dictionarySize, dictionarySizes, dictionaryType, numHUpdates,
gccPHATHistory, inputSpectrogramHistory, outputSpectrogramHistory, coefficientMaskHistories,
def __init__(self, audioPath, numTDOAs, gccPHATNLAlpha, gccPHATNLEnabled, dictionariesW, dictionarySize, dictionarySizes, dictionaryType, numHUpdates, localizationEnabled, localizationWindowSize,
gccPHATHistory, tdoaHistory, inputSpectrogramHistory, outputSpectrogramHistory, coefficientMaskHistories,
togglePlayAudioProcessQueue, togglePlayAudioProcessAck,
togglePlayGCCNMFProcessQueue, togglePlayGCCNMFProcessAck,
tdoaParamsGCCNMFProcessQueue, tdoaParamsGCCNMFProcessAck,
Expand All @@ -63,12 +63,15 @@ def __init__(self, audioPath, numTDOAs, gccPHATNLAlpha, gccPHATNLEnabled, dictio
self.targetTDOAEpsilon = self.numTDOAs / 10.0
self.gccPHATNLAlpha = gccPHATNLAlpha
self.gccPHATNLEnabled = gccPHATNLEnabled
self.localizationEnabled = localizationEnabled
self.localizationWindowSize = localizationWindowSize

self.gccPHATPlotTimer = QtCore.QTimer()
self.gccPHATPlotTimer.timeout.connect(self.updateGCCPHATPlot)

self.gccPHATHistory = gccPHATHistory
self.gccPHATHistorySize = gccPHATHistory.size()
self.tdoaHistory = tdoaHistory
self.inputSpectrogramHistory = inputSpectrogramHistory
self.outputSpectrogramHistory = outputSpectrogramHistory
self.coefficientMaskHistories = coefficientMaskHistories
Expand Down Expand Up @@ -101,6 +104,8 @@ def __init__(self, audioPath, numTDOAs, gccPHATNLAlpha, gccPHATNLEnabled, dictio
self.initVisualizationWidgets()
self.initWindowLayout()

self.localizationStateChanged()

#self.show()
self.showMaximized()

Expand Down Expand Up @@ -211,13 +216,15 @@ def initControlWidgets(self):
self.initMaskFunctionControls()
self.initMaskFunctionPlot()
self.initNMFControls()
self.initLocalizationControls()
self.initUIControls()

controlWidgetsLayout = QtGui.QVBoxLayout()
controlWidgetsLayout.addWidget(self.gccPHATPlotWidget)
controlWidgetsLayout.addLayout(self.maskFunctionControlslayout)
self.addSeparator(controlWidgetsLayout)
controlWidgetsLayout.addLayout(self.nmfControlsLayout)
controlWidgetsLayout.addLayout(self.localizationControlsLayout)
self.addSeparator(controlWidgetsLayout)
controlWidgetsLayout.addWidget(self.uiConrolsWidget)

Expand All @@ -232,19 +239,20 @@ def initMaskFunctionControls(self):
self.maskFunctionControlslayout.addLayout(labelsLayout)
self.maskFunctionControlslayout.addLayout(slidersLayout)
def addSlider(label, changedFunction, minimum, maximum, value):
labelsLayout.addWidget(QtGui.QLabel(label))
labelWidget = QtGui.QLabel(label)
labelsLayout.addWidget(labelWidget)
slider = QtGui.QSlider(QtCore.Qt.Horizontal)
slider.setMinimum(minimum)
slider.setMaximum(maximum)
slider.setValue(value)
slider.sliderReleased.connect(changedFunction)
slidersLayout.addWidget(slider)
return slider
return slider, labelWidget

self.targetModeWindowTDOASlider = addSlider('Center:', self.tdoaRegionChanged, 0, 100, 50)
self.targetModeWindowWidthSlider = addSlider('Width:', self.tdoaRegionChanged, 1, 101, 50)
self.targetModeWindowBetaSlider = addSlider('Shape:', self.tdoaRegionChanged, 0, 100, 50)
self.targetModeWindowNoiseFloorSlider = addSlider('Floor:', self.tdoaRegionChanged, 0, 100, 0)
self.targetModeWindowTDOASlider, self.targetModeWindowTDOALabel = addSlider('Center:', self.tdoaRegionChanged, 0, 100, 50)
self.targetModeWindowWidthSlider, _ = addSlider('Width:', self.tdoaRegionChanged, 1, 101, 50)
self.targetModeWindowBetaSlider, _ = addSlider('Shape:', self.tdoaRegionChanged, 0, 100, 50)
self.targetModeWindowNoiseFloorSlider, _ = addSlider('Floor:', self.tdoaRegionChanged, 0, 100, 0)

def initMaskFunctionPlot(self):
self.gccPHATPlotWidget = self.createGraphicsLayoutWidget(self.backgroundColor, contentMargins=(6, 12, 18, 10))
Expand All @@ -265,7 +273,7 @@ def initMaskFunctionPlot(self):
self.targetWindowFunctionPlot = TargetWindowFunctionPlot(self.targetTDOARegion, self.targetModeWindowTDOASlider, self.targetModeWindowBetaSlider, self.targetModeWindowNoiseFloorSlider, self.targetModeWindowWidthSlider, self.numTDOAs, pen=self.targetWindowFunctionPen)
self.gccPHATPlotItem.addItem(self.targetWindowFunctionPlot)
self.targetWindowFunctionPlot.updateData()

def initNMFControls(self):
self.nmfControlsLayout = QtGui.QHBoxLayout()
self.nmfControlsLayout.addStretch(1)
Expand All @@ -284,6 +292,25 @@ def initNMFControls(self):
self.nmfControlsLayout.addWidget(self.numHUpdatesSpinBox)
self.nmfControlsLayout.addStretch(1)

def initLocalizationControls(self):
self.localizationControlsLayout = QtGui.QHBoxLayout()
self.localizationControlsLayout.addStretch(3)
self.localizationCheckBox = QtGui.QCheckBox('Enable Localization')
self.localizationCheckBox.setChecked(self.localizationEnabled)
self.localizationCheckBox.stateChanged.connect(self.localizationStateChanged)
self.localizationControlsLayout.addWidget(self.localizationCheckBox)

self.localizationControlsLayout.addStretch(1)
self.localizationWindowSizeLabel = QtGui.QLabel('Sliding Window Size:')
self.localizationControlsLayout.addWidget(self.localizationWindowSizeLabel)
self.localziaitonWindowSizeSpinBox = QtGui.QSpinBox()
self.localziaitonWindowSizeSpinBox.setMinimum(1)
self.localziaitonWindowSizeSpinBox.setMaximum(128)
self.localziaitonWindowSizeSpinBox.setValue(self.localizationWindowSize)
self.localziaitonWindowSizeSpinBox.valueChanged.connect(self.localizationParamsChanged)
self.localizationControlsLayout.addWidget(self.localziaitonWindowSizeSpinBox)
self.localizationControlsLayout.addStretch(3)

def initUIControls(self):
self.uiConrolsWidget = QtGui.QWidget()
buttonBarWidgetLayout = QtGui.QHBoxLayout(spacing=0)
Expand Down Expand Up @@ -326,6 +353,9 @@ def initVisualizationWidgets(self):
gccPHATHistoryViewBox.addItem(self.gccPHATImageItem)
gccPHATHistoryViewBox.setRange(xRange=(0, self.gccPHATHistory.values.shape[1] - 1), yRange=(0, self.gccPHATHistory.values.shape[0] - 1), padding=0)

self.tdoaPlotDataItem = pg.PlotDataItem( pen=pg.mkPen((255, 0, 0, 255), width=4) )
gccPHATHistoryViewBox.addItem(self.tdoaPlotDataItem)

dictionarySize = self.dictionarySizes[self.dictionarySizeDropDown.currentIndex()]
self.coefficientMaskWidget = self.createGraphicsLayoutWidget(self.backgroundColor)
self.coefficientMaskViewBox = self.coefficientMaskWidget.addViewBox()
Expand Down Expand Up @@ -359,15 +389,21 @@ def updateGCCPHATPlot(self):
self.gccPHATPlot.setData(y=gccPHATValues)
if self.rollingImages:
self.gccPHATImageItem.setImage(-self.gccPHATHistory.getUnraveledArray().T)
self.tdoaPlotDataItem.setData(y=self.tdoaHistory.getUnraveledArray()[0])
self.inputSpectrogramHistoryImageItem.setImage(self.inputSpectrogramHistory.getUnraveledArray().T)
self.outputSpectrogramHistoryImageItem.setImage(self.outputSpectrogramHistory.getUnraveledArray().T)
self.coefficientMaskHistoryImageItem.setImage(self.coefficientMaskHistory.getUnraveledArray().T, levels=[0, 1])
else:
self.gccPHATImageItem.setImage(-self.gccPHATHistory.values.T)
self.tdoaPlotDataItem.setData(y=self.tdoaHistory.values[0])
self.inputSpectrogramHistoryImageItem.setImage(self.inputSpectrogramHistory.values.T)
self.outputSpectrogramHistoryImageItem.setImage(self.outputSpectrogramHistory.values.T)
self.coefficientMaskHistoryImageItem.setImage(self.coefficientMaskHistory.values.T, levels=[0, 1])

if self.localizationCheckBox.isChecked():
sliderValue = self.tdoaHistory.get()[0] / (self.numTDOAs-1) * 100
self.targetModeWindowTDOASlider.setValue(sliderValue)

def toggleInfoViews(self):
isHidden = self.infoLabelWidgets[0].isHidden()
for view in self.infoLabelWidgets:
Expand Down Expand Up @@ -439,7 +475,14 @@ def tdoaRegionChanged(self):
'targetTDOANoiseFloor': self.targetWindowFunctionPlot.getNoiseFloor()},
'gccNMFProcessTDOAParameters (region)')
self.targetWindowFunctionPlot.updateData()


def localizationParamsChanged(self):
self.queueParams(self.tdoaParamsGCCNMFProcessQueue,
self.tdoaParamsGCCNMFProcessAck,
{'localizationEnabled': self.localizationCheckBox.isChecked(),
'localizationWindowSize': int(self.localziaitonWindowSizeSpinBox.value())},
'gccNMFProcessTDOAParameters (localization)')

def dictionarySizeChanged(self, changeGCCNMFProcessor=True):
self.dictionarySize = self.dictionarySizes[self.dictionarySizeDropDown.currentIndex()]
logging.info('GCCNMFInterface: setting dictionarySize: %d' % self.dictionarySize)
Expand Down Expand Up @@ -468,6 +511,15 @@ def dictionaryTypeChanged(self):
{'dictionaryType': dictionaryType},
'gccNMFProcessTogglePlayParameters')

def localizationStateChanged(self):
onlineLocalizationEnabled = self.localizationCheckBox.isChecked()
self.targetModeWindowTDOASlider.setEnabled(not onlineLocalizationEnabled)
self.targetModeWindowTDOALabel.setEnabled(not onlineLocalizationEnabled)
self.localziaitonWindowSizeSpinBox.setEnabled(onlineLocalizationEnabled)
self.localizationWindowSizeLabel.setEnabled(onlineLocalizationEnabled)

self.localizationParamsChanged()

def queueParams(self, queue, ack, params, label='params'):
ack.clear()
logging.debug('GCCNMFInterface: putting %s' % label)
Expand Down
25 changes: 21 additions & 4 deletions gccNMF/realtime/gccNMFProcessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,15 @@
TARGET_MODE_WINDOW_FUNCTION = 2

class GCCNMFProcess(Process):
def __init__(self, oladProcessor, sampleRate, windowSize, numTimePerChunk, dictionariesW, dictionaryType, dictionarySize, numHUpdates, microphoneSeparationInMetres,
gccPHATHistory, inputSpectrogramHistory, outputSpectrogramHistory, coefficientMaskHistories,
def __init__(self, oladProcessor, sampleRate, windowSize, numTimePerChunk, dictionariesW, dictionaryType, dictionarySize, numHUpdates, microphoneSeparationInMetres, localizationEnabled, localizationWindowSize,
gccPHATHistory, tdoaHistory, inputSpectrogramHistory, outputSpectrogramHistory, coefficientMaskHistories,
tdoaParametersQueue, tdoaParametersAck, togglePlayQueue, togglePlayAck, toggleSeparationQueue, toggleSeparationAck,
processFramesEvent, processFramesDoneEvent, terminateEvent):
super(GCCNMFProcess, self).__init__()

self.oladProcessor = oladProcessor
self.gccNMFProcessor = GCCNMFProcessor(sampleRate, windowSize, numTimePerChunk, dictionariesW, dictionaryType, dictionarySize, numHUpdates, microphoneSeparationInMetres,
gccPHATHistory, inputSpectrogramHistory, outputSpectrogramHistory, coefficientMaskHistories)
localizationEnabled, localizationWindowSize, gccPHATHistory, tdoaHistory, inputSpectrogramHistory, outputSpectrogramHistory, coefficientMaskHistories)

self.tdoaParametersQueue = tdoaParametersQueue
self.tdoaParametersAck = tdoaParametersAck
Expand Down Expand Up @@ -109,6 +109,12 @@ def processTDOAParametersQueue(self):
targetTDOAIndexes = parameters['targetTDOAIndexes']
logging.info( 'GCCNMFProcessor: setting targetTDOAIndexes: %s' % str(targetTDOAIndexes) )
self.gccNMFProcessor.setTargetTDOAIndexes(targetTDOAIndexes)
elif 'localizationEnabled' in parameters:
localizationEnabled = parameters['localizationEnabled']
localizationWindowSize = parameters['localizationWindowSize']
logging.info( 'GCCNMFProcessor: setting localizationEnabled: %s, localizationWindowSize %s' % (str(localizationEnabled), str(localizationWindowSize)) )
self.gccNMFProcessor.localizationEnabled = localizationEnabled
self.gccNMFProcessor.localizationWindowSize = localizationWindowSize
else:
targetTDOAIndex = parameters['targetTDOAIndex']
targetTDOAEpsilon = parameters['targetTDOAEpsilon']
Expand Down Expand Up @@ -160,7 +166,7 @@ def processToggleSeparationQueue(self):

class GCCNMFProcessor(object):
def __init__(self, sampleRate, windowSize, numTimePerChunk, dictionariesW, dictionaryType, dictionarySize, numHUpdates, microphoneSeparationInMetres,
gccPHATHistory=None, inputSpectrogramHistory=None, outputSpectrogramHistory=None, coefficientMaskHistories=None):
localizationEnabled, localizationWindowSize, gccPHATHistory=None, tdoaHistory=None, inputSpectrogramHistory=None, outputSpectrogramHistory=None, coefficientMaskHistories=None):
super(GCCNMFProcessor, self).__init__()

self.sampleRate = sampleRate
Expand All @@ -172,6 +178,7 @@ def __init__(self, sampleRate, windowSize, numTimePerChunk, dictionariesW, dicti
self.microphoneSeparationInMetres = microphoneSeparationInMetres

self.gccPHATHistory = gccPHATHistory
self.tdoaHistory = tdoaHistory
self.inputSpectrogramHistory = inputSpectrogramHistory
self.outputSpectrogramHistory = outputSpectrogramHistory
self.coefficientMaskHistories = coefficientMaskHistories
Expand All @@ -181,6 +188,8 @@ def __init__(self, sampleRate, windowSize, numTimePerChunk, dictionariesW, dicti

self.numTDOAs = None
self.separationEnabled = True
self.localizationEnabled = localizationEnabled
self.localizationWindowSize = localizationWindowSize
self.targetMode = TARGET_MODE_WINDOW_FUNCTION

from theano import shared
Expand Down Expand Up @@ -208,6 +217,14 @@ def processFrames(self, windowedSamples):
self.inputSpectrogramHistory.set( -np.mean(np.abs(self.complexMixtureSpectrogram), axis=0) ** (1/3.0) )
if self.gccPHATHistory:
self.gccPHATHistory.set( np.nanmean(realGCC, axis=0).T )
if self.tdoaHistory:
if self.localizationEnabled:
gccPHATHistory = self.gccPHATHistory.getUnraveledArray()
tdoaIndex = np.argmax( np.nanmean(gccPHATHistory[:, -self.localizationWindowSize:], axis=-1) )
#tdoaIndex = (self.targetTDOAIndex.get_value() + 1) % self.numTDOAs
#tdoaIndex = np.random.randint(0, self.numTDOAs+1)
self.targetTDOAIndex.set_value(tdoaIndex)
self.tdoaHistory.set( np.array( [[self.targetTDOAIndex.get_value()]] ) )
if self.outputSpectrogramHistory:
self.outputSpectrogramHistory.set( -np.nanmean(np.abs(outputSpectrogram), axis=0) ** (1/3.0) )

Expand Down
Loading

0 comments on commit d64e3ff

Please sign in to comment.