charlee · August 9, 2023 05:59 · charlee · Aug 9, 2023
diff --git a/A4Part1.py b/A4Part1.py
 import numpy as np
 from scipy.signal import get_window
 from scipy.fftpack import fft, fftshift
 import math
 import matplotlib.pyplot as plt
 eps = np.finfo(float).eps

 """ 
 A4-Part-1: Extracting the main lobe of the spectrum of a window

 Write a function that extracts the main lobe of the magnitude spectrum of a window given a window 
 type and its length (M). The function should return the samples corresponding to the main lobe in 
 decibels (dB).

 To compute the spectrum, take the FFT size (N) to be 8 times the window length (N = 8*M) (For this 
 part, N need not be a power of 2). 

 The input arguments to the function are the window type (window) and the length of the window (M). 
 The function should return a numpy array containing the samples corresponding to the main lobe of 
 the window. 

 In the returned numpy array you should include the samples corresponding to both the local minimas
 across the main lobe. 

 The possible window types that you can expect as input are rectangular ('boxcar'), 'hamming' or
 'blackmanharris'.

 NOTE: You can approach this question in two ways: 1) You can write code to find the indices of the 
 local minimas across the main lobe. 2) You can manually note down the indices of these local minimas 
 by plotting and a visual inspection of the spectrum of the window. If done manually, the indices 
 have to be obtained for each possible window types separately (as they differ across different 
 window types).

 Tip: log10(0) is not well defined, so its a common practice to add a small value such as eps = 1e-16 
 to the magnitude spectrum before computing it in dB. This is optional and will not affect your answers. 
 If you find it difficult to concatenate the two halves of the main lobe, you can first center the 
 spectrum using fftshift() and then compute the indexes of the minimas around the main lobe.


 Test case 1: If you run your code using window = 'blackmanharris' and M = 100, the output numpy 
 array should contain 65 samples.

 Test case 2: If you run your code using window = 'boxcar' and M = 120, the output numpy array 
 should contain 17 samples.

 Test case 3: If you run your code using window = 'hamming' and M = 256, the output numpy array 
 should contain 33 samples.

 """
 def extractMainLobe(window, M):
    """
    Input:
            window (string): Window type to be used (Either rectangular ('boxcar'), 'hamming' or '
                blackmanharris')
            M (integer): length of the window to be used
    Output:
            The function should return a numpy array containing the main lobe of the magnitude 
            spectrum of the window in decibels (dB).
    """

    w = get_window(window, M)         # get the window 
    
    ### Your code here
    N = 8*M
    fftbuffer = np.zeros(N)

    hM1 = (M + 1) // 2
    hM2 = M // 2

    fftbuffer[:hM1] = w[hM2:]
    fftbuffer[-hM2:] = w[:hM1]

    X = fft(fftbuffer)
    X = abs(X)
    X = fftshift(X)

    center = N // 2
    hb = center
    while X[hb] >= X[hb + 1]:
        hb += 1

    lb = center
    while X[lb] >= X[lb - 1]:
        lb -= 1

    X = X[lb:hb+1]

    X = 20 * np.log10(abs(X) + eps)

    return X



 if __name__ == '__main__':
    import loadTestCases
    import matplotlib.pyplot as plt

    nCases = 3

    for i in range(nCases):

        testcase = loadTestCases.load(1, i + 1)

        samples = extractMainLobe(**testcase['input'])

        w = get_window(testcase['input']['window'], testcase['input']['M'])

        plt.subplot(3, nCases, i + nCases * 0 + 1)
        plt.plot(w)
        plt.title(f'Input ({testcase["input"]["window"]})')

        plt.subplot(3, nCases, i + nCases * 1 + 1)
        plt.plot(samples)
        plt.title('Output samples')

        plt.subplot(3, nCases, i + nCases * 2 + 1)
        plt.plot(testcase['output'])
        plt.title('Expected output samples')

    plt.show()
diff --git a/A4Part2.py b/A4Part2.py
 import os
 import sys
 import numpy as np
 import math
 from scipy.signal import get_window
 import matplotlib.pyplot as plt

 sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../software/models/'))
 import stft
 import utilFunctions as UF
 eps = np.finfo(float).eps


 """
 A4-Part-2: Measuring noise in the reconstructed signal using the STFT model 

 Write a function that measures the amount of noise introduced during the analysis and synthesis of a 
 signal using the STFT model. Use SNR (signal to noise ratio) in dB to quantify the amount of noise. 
 Use the stft() function in stft.py to do an analysis followed by a synthesis of the input signal.

 A brief description of the SNR computation can be found in the pdf document (A4-STFT.pdf, in Relevant 
 Concepts section) in the assignment directory (A4). Use the time domain energy definition to compute
 the SNR.

 With the input signal and the obtained output, compute two different SNR values for the following cases:

 1) SNR1: Over the entire length of the input and the output signals.
 2) SNR2: For the segment of the signals left after discarding M samples from both the start and the 
 end, where M is the analysis window length. Note that this computation is done after STFT analysis 
 and synthesis.

 The input arguments to the function are the wav file name including the path (inputFile), window 
 type (window), window length (M), FFT size (N), and hop size (H). The function should return a python 
 tuple of both the SNR values in decibels: (SNR1, SNR2). Both SNR1 and SNR2 are float values. 

 Test case 1: If you run your code using piano.wav file with 'blackman' window, M = 513, N = 2048 and 
 H = 128, the output SNR values should be around: (67.57748352378475, 304.68394693221649).

 Test case 2: If you run your code using sax-phrase-short.wav file with 'hamming' window, M = 512, 
 N = 1024 and H = 64, the output SNR values should be around: (89.510506656299285, 306.18696700251388).

 Test case 3: If you run your code using rain.wav file with 'hann' window, M = 1024, N = 2048 and 
 H = 128, the output SNR values should be around: (74.631476225366825, 304.26918192997738).

 Due to precision differences on different machines/hardware, compared to the expected SNR values, your 
 output values can differ by +/-10dB for SNR1 and +/-100dB for SNR2.
 """

 def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here
    (fs, x) = UF.wavread(inputFile)
    w = get_window(window, M, False)
    y = stft.stft(x, w, N, H)

    noise = y - x

    SNR1 = 10 * np.log10(np.sum(x**2) / np.sum(noise**2))

    trimmed_x = x[M:-M]
    trimmed_noise = noise[M:-M]

 #    print(M)
 #    print(x.shape)
 #    print(trimmed_x.shape)
 #
 #    print(f'E x: {np.sum(x**2)}')
 #    print(f'E trimmed x: {np.sum(trimmed_x**2)}')
 #
 #    print(f'E noise: {np.sum(noise**2)}')
 #    print(f'E trimmed noise: {np.sum(trimmed_noise**2)}')
 #
 #    plt.subplot(3, 2, 1)
 #    plt.plot(x[:1000])
 #    plt.subplot(3, 2, 3)
 #    plt.plot(y[:1000])
 #    plt.subplot(3, 2, 5)
 #    plt.plot(noise[:1000])
 #
 #    plt.subplot(3, 2, 2)
 #    plt.plot(trimmed_x[:1000])
 #    plt.subplot(3, 2, 4)
 #    plt.plot(trimmed_y[:1000])
 #    plt.subplot(3, 2, 6)
 #    plt.plot(trimmed_noise[:1000])
 #    plt.show()
 #
 #
 #    exit(1)
 #

    SNR2 = 10 * np.log10(np.sum(trimmed_x**2) / np.sum(trimmed_noise**2))

    return (SNR1, SNR2)



 if __name__ == '__main__':
    import loadTestCases
    import matplotlib.pyplot as plt

    nCases = 3

    for i in range(nCases):

        testcase = loadTestCases.load(2, i + 1)

        (SNR1, SNR2) = computeSNR(**testcase['input'])

        print(f'Test case {i + 1}')
        print(f'inputFile: {testcase["input"]}')
        print(f'Expected: {testcase["output"]}')
        print(f'Actual: {(SNR1, SNR2)}')
diff --git a/A4Part3.py b/A4Part3.py
 import os
 import sys
 import numpy as np
 from scipy.signal import get_window
 import matplotlib.pyplot as plt

 sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../software/models/'))
 import stft
 import utilFunctions as UF

 eps = np.finfo(float).eps

 """
 A4-Part-3: Computing band-wise energy envelopes of a signal

 Write a function that computes band-wise energy envelopes of a given audio signal by using the STFT.
 Consider two frequency bands for this question, low and high. The low frequency band is the set of 
 all the frequencies between 0 and 3000 Hz and the high frequency band is the set of all the 
 frequencies between 3000 and 10000 Hz (excluding the boundary frequencies in both the cases). 
 At a given frame, the value of the energy envelope of a band can be computed as the sum of squared 
 values of all the frequency coefficients in that band. Compute the energy envelopes in decibels. 

 Refer to "A4-STFT.pdf" document for further details on computing bandwise energy.

 The input arguments to the function are the wav file name including the path (inputFile), window 
 type (window), window length (M), FFT size (N) and hop size (H). The function should return a numpy 
 array with two columns, where the first column is the energy envelope of the low frequency band and 
 the second column is that of the high frequency band.

 Use stft.stftAnal() to obtain the STFT magnitude spectrum for all the audio frames. Then compute two 
 energy values for each frequency band specified. While calculating frequency bins for each frequency 
 band, consider only the bins that are within the specified frequency range. For example, for the low 
 frequency band consider only the bins with frequency > 0 Hz and < 3000 Hz (you can use np.where() to 
 find those bin indexes). This way we also remove the DC offset in the signal in energy envelope 
 computation. The frequency corresponding to the bin index k can be computed as k*fs/N, where fs is 
 the sampling rate of the signal.

 To get a better understanding of the energy envelope and its characteristics you can plot the envelopes 
 together with the spectrogram of the signal. You can use matplotlib plotting library for this purpose. 
 To visualize the spectrogram of a signal, a good option is to use colormesh. You can reuse the code in
 sms-tools/lectures/4-STFT/plots-code/spectrogram.py. Either overlay the envelopes on the spectrogram 
 or plot them in a different subplot. Make sure you use the same range of the x-axis for both the 
 spectrogram and the energy envelopes.

 NOTE: Running these test cases might take a few seconds depending on your hardware.

 Test case 1: Use piano.wav file with window = 'blackman', M = 513, N = 1024 and H = 128 as input. 
 The bin indexes of the low frequency band span from 1 to 69 (69 samples) and of the high frequency 
 band span from 70 to 232 (163 samples). To numerically compare your output, use loadTestCases.py
 script to obtain the expected output.

 Test case 2: Use piano.wav file with window = 'blackman', M = 2047, N = 4096 and H = 128 as input. 
 The bin indexes of the low frequency band span from 1 to 278 (278 samples) and of the high frequency 
 band span from 279 to 928 (650 samples). To numerically compare your output, use loadTestCases.py
 script to obtain the expected output.

 Test case 3: Use sax-phrase-short.wav file with window = 'hamming', M = 513, N = 2048 and H = 256 as 
 input. The bin indexes of the low frequency band span from 1 to 139 (139 samples) and of the high 
 frequency band span from 140 to 464 (325 samples). To numerically compare your output, use 
 loadTestCases.py script to obtain the expected output.

 In addition to comparing results with the expected output, you can also plot your output for these 
 test cases.You can clearly notice the sharp attacks and decay of the piano notes for test case 1 
 (See figure in the accompanying pdf). You can compare this with the output from test case 2 that 
 uses a larger window. You can infer the influence of window size on sharpness of the note attacks 
 and discuss it on the forums.
 """
 def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, 
                hamming, blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """
    
    ### your code here
    (fs, x) = UF.wavread(inputFile)

    w = get_window(window, M, False)

    mX, pX = stft.stftAnal(x, w, N, H)

    mX = 10 ** (mX / 20)

    # Get the frequency bins
    freq_per_bin = fs / N
    lfb = np.ceil(3000 / freq_per_bin)
    hfb = np.ceil(10000 / freq_per_bin)
    low_freq_bins = np.arange(1, lfb, dtype=int)
    high_freq_bins = np.arange(lfb, hfb, dtype=int)

    #print(freq_per_bin)
    #print(lfb, hfb)
    #print(low_freq_bins)
    #print(low_freq_bins * freq_per_bin)
    #print(high_freq_bins)
    #print(high_freq_bins * freq_per_bin)

    low_band = mX[:, low_freq_bins]
    high_band = mX[:, high_freq_bins]


    # Compute the energy envelope
    low_band_env = 10 * np.log10(np.sum(np.abs(low_band) ** 2, axis=1))
    high_band_env = 10 * np.log10(np.sum(np.abs(high_band) ** 2, axis=1))

    engEnv = (np.vstack((low_band_env, high_band_env))).T

    return engEnv


    
 if __name__ == '__main__':
    import loadTestCases
    import matplotlib.pyplot as plt

    nCases = 3

    f, axs = plt.subplots(3, nCases, figsize=(16, 8))
    f.tight_layout(h_pad=3, w_pad=3)

    for i in range(nCases):

        testcase = loadTestCases.load(3, i + 1)

        engEnv = computeEngEnv(**testcase['input'])

        print(f'Test case {i+1}:')
        print(f'Actual output: {engEnv}')
        print(f'Expected output: {testcase["output"]}')

        axs[0][i].plot(engEnv[:, 0], label='Low Band')
        axs[0][i].plot(engEnv[:, 1], label='High Band')
        axs[0][i].set_xlabel(f'Frame number')
        axs[0][i].set_ylabel(f'Energy (dB)')
        axs[0][i].set_title(f'Test case {i+1}: Actual')
        axs[0][i].legend()

        axs[1][i].plot(testcase['output'][:, 0], label='Low Band')
        axs[1][i].plot(testcase['output'][:, 1], label='High Band')
        axs[1][i].set_xlabel(f'Frame number')
        axs[1][i].set_ylabel(f'Energy (dB)')
        axs[1][i].set_title(f'Test case {i+1}: Expected')
        axs[1][i].legend()

        axs[2][i].plot(engEnv[:, 0] - testcase['output'][:, 0], label='Low Band')
        axs[2][i].plot(engEnv[:, 1] - testcase['output'][:, 1], label='High Band')
        axs[2][i].set_xlabel(f'Frame number')
        axs[2][i].set_ylabel(f'Energy (dB)')
        axs[2][i].set_title(f'Error')
        axs[2][i].legend()

    plt.show()
	import numpy as np
	from scipy.signal import get_window
	from scipy.fftpack import fft, fftshift
	import math
	import matplotlib.pyplot as plt
	eps = np.finfo(float).eps

	"""
	A4-Part-1: Extracting the main lobe of the spectrum of a window

	Write a function that extracts the main lobe of the magnitude spectrum of a window given a window
	type and its length (M). The function should return the samples corresponding to the main lobe in
	decibels (dB).

	To compute the spectrum, take the FFT size (N) to be 8 times the window length (N = 8*M) (For this
	part, N need not be a power of 2).

	The input arguments to the function are the window type (window) and the length of the window (M).
	The function should return a numpy array containing the samples corresponding to the main lobe of
	the window.

	In the returned numpy array you should include the samples corresponding to both the local minimas
	across the main lobe.

	The possible window types that you can expect as input are rectangular ('boxcar'), 'hamming' or
	'blackmanharris'.

	NOTE: You can approach this question in two ways: 1) You can write code to find the indices of the
	local minimas across the main lobe. 2) You can manually note down the indices of these local minimas
	by plotting and a visual inspection of the spectrum of the window. If done manually, the indices
	have to be obtained for each possible window types separately (as they differ across different
	window types).

	Tip: log10(0) is not well defined, so its a common practice to add a small value such as eps = 1e-16
	to the magnitude spectrum before computing it in dB. This is optional and will not affect your answers.
	If you find it difficult to concatenate the two halves of the main lobe, you can first center the
	spectrum using fftshift() and then compute the indexes of the minimas around the main lobe.


	Test case 1: If you run your code using window = 'blackmanharris' and M = 100, the output numpy
	array should contain 65 samples.

	Test case 2: If you run your code using window = 'boxcar' and M = 120, the output numpy array
	should contain 17 samples.

	Test case 3: If you run your code using window = 'hamming' and M = 256, the output numpy array
	should contain 33 samples.

	"""
	def extractMainLobe(window, M):
	"""
	Input:
	window (string): Window type to be used (Either rectangular ('boxcar'), 'hamming' or '
	blackmanharris')
	M (integer): length of the window to be used
	Output:
	The function should return a numpy array containing the main lobe of the magnitude
	spectrum of the window in decibels (dB).
	"""

	w = get_window(window, M) # get the window

	### Your code here
	N = 8*M
	fftbuffer = np.zeros(N)

	hM1 = (M + 1) // 2
	hM2 = M // 2

	fftbuffer[:hM1] = w[hM2:]
	fftbuffer[-hM2:] = w[:hM1]

	X = fft(fftbuffer)
	X = abs(X)
	X = fftshift(X)

	center = N // 2
	hb = center
	while X[hb] >= X[hb + 1]:
	hb += 1

	lb = center
	while X[lb] >= X[lb - 1]:
	lb -= 1

	X = X[lb:hb+1]

	X = 20 * np.log10(abs(X) + eps)

	return X



	if __name__ == '__main__':
	import loadTestCases
	import matplotlib.pyplot as plt

	nCases = 3

	for i in range(nCases):

	testcase = loadTestCases.load(1, i + 1)

	samples = extractMainLobe(**testcase['input'])

	w = get_window(testcase['input']['window'], testcase['input']['M'])

	plt.subplot(3, nCases, i + nCases * 0 + 1)
	plt.plot(w)
	plt.title(f'Input ({testcase["input"]["window"]})')

	plt.subplot(3, nCases, i + nCases * 1 + 1)
	plt.plot(samples)
	plt.title('Output samples')

	plt.subplot(3, nCases, i + nCases * 2 + 1)
	plt.plot(testcase['output'])
	plt.title('Expected output samples')

	plt.show()
	import os
	import sys
	import numpy as np
	import math
	from scipy.signal import get_window
	import matplotlib.pyplot as plt

	sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../software/models/'))
	import stft
	import utilFunctions as UF
	eps = np.finfo(float).eps


	"""
	A4-Part-2: Measuring noise in the reconstructed signal using the STFT model

	Write a function that measures the amount of noise introduced during the analysis and synthesis of a
	signal using the STFT model. Use SNR (signal to noise ratio) in dB to quantify the amount of noise.
	Use the stft() function in stft.py to do an analysis followed by a synthesis of the input signal.

	A brief description of the SNR computation can be found in the pdf document (A4-STFT.pdf, in Relevant
	Concepts section) in the assignment directory (A4). Use the time domain energy definition to compute
	the SNR.

	With the input signal and the obtained output, compute two different SNR values for the following cases:

	1) SNR1: Over the entire length of the input and the output signals.
	2) SNR2: For the segment of the signals left after discarding M samples from both the start and the
	end, where M is the analysis window length. Note that this computation is done after STFT analysis
	and synthesis.

	The input arguments to the function are the wav file name including the path (inputFile), window
	type (window), window length (M), FFT size (N), and hop size (H). The function should return a python
	tuple of both the SNR values in decibels: (SNR1, SNR2). Both SNR1 and SNR2 are float values.

	Test case 1: If you run your code using piano.wav file with 'blackman' window, M = 513, N = 2048 and
	H = 128, the output SNR values should be around: (67.57748352378475, 304.68394693221649).

	Test case 2: If you run your code using sax-phrase-short.wav file with 'hamming' window, M = 512,
	N = 1024 and H = 64, the output SNR values should be around: (89.510506656299285, 306.18696700251388).

	Test case 3: If you run your code using rain.wav file with 'hann' window, M = 1024, N = 2048 and
	H = 128, the output SNR values should be around: (74.631476225366825, 304.26918192997738).

	Due to precision differences on different machines/hardware, compared to the expected SNR values, your
	output values can differ by +/-10dB for SNR1 and +/-100dB for SNR2.
	"""

	def computeSNR(inputFile, window, M, N, H):
	"""
	Input:
	inputFile (string): wav file name including the path
	window (string): analysis window type (choice of rectangular, triangular, hanning, hamming,
	blackman, blackmanharris)
	M (integer): analysis window length (odd positive integer)
	N (integer): fft size (power of two, > M)
	H (integer): hop size for the stft computation
	Output:
	The function should return a python tuple of both the SNR values (SNR1, SNR2)
	SNR1 and SNR2 are floats.
	"""
	## your code here
	(fs, x) = UF.wavread(inputFile)
	w = get_window(window, M, False)
	y = stft.stft(x, w, N, H)

	noise = y - x

	SNR1 = 10 * np.log10(np.sum(x2) / np.sum(noise2))

	trimmed_x = x[M:-M]
	trimmed_noise = noise[M:-M]

	# print(M)
	# print(x.shape)
	# print(trimmed_x.shape)
	#
	# print(f'E x: {np.sum(x**2)}')
	# print(f'E trimmed x: {np.sum(trimmed_x**2)}')
	#
	# print(f'E noise: {np.sum(noise**2)}')
	# print(f'E trimmed noise: {np.sum(trimmed_noise**2)}')
	#
	# plt.subplot(3, 2, 1)
	# plt.plot(x[:1000])
	# plt.subplot(3, 2, 3)
	# plt.plot(y[:1000])
	# plt.subplot(3, 2, 5)
	# plt.plot(noise[:1000])
	#
	# plt.subplot(3, 2, 2)
	# plt.plot(trimmed_x[:1000])
	# plt.subplot(3, 2, 4)
	# plt.plot(trimmed_y[:1000])
	# plt.subplot(3, 2, 6)
	# plt.plot(trimmed_noise[:1000])
	# plt.show()
	#
	#
	# exit(1)
	#

	SNR2 = 10 * np.log10(np.sum(trimmed_x2) / np.sum(trimmed_noise2))

	return (SNR1, SNR2)



	if __name__ == '__main__':
	import loadTestCases
	import matplotlib.pyplot as plt

	nCases = 3

	for i in range(nCases):

	testcase = loadTestCases.load(2, i + 1)

	(SNR1, SNR2) = computeSNR(**testcase['input'])

	print(f'Test case {i + 1}')
	print(f'inputFile: {testcase["input"]}')
	print(f'Expected: {testcase["output"]}')
	print(f'Actual: {(SNR1, SNR2)}')