Created
August 9, 2023 05:59
-
-
Save charlee/9849195d6efa25284934ab22679c0d5a to your computer and use it in GitHub Desktop.
A4
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from scipy.signal import get_window | |
from scipy.fftpack import fft, fftshift | |
import math | |
import matplotlib.pyplot as plt | |
eps = np.finfo(float).eps | |
""" | |
A4-Part-1: Extracting the main lobe of the spectrum of a window | |
Write a function that extracts the main lobe of the magnitude spectrum of a window given a window | |
type and its length (M). The function should return the samples corresponding to the main lobe in | |
decibels (dB). | |
To compute the spectrum, take the FFT size (N) to be 8 times the window length (N = 8*M) (For this | |
part, N need not be a power of 2). | |
The input arguments to the function are the window type (window) and the length of the window (M). | |
The function should return a numpy array containing the samples corresponding to the main lobe of | |
the window. | |
In the returned numpy array you should include the samples corresponding to both the local minimas | |
across the main lobe. | |
The possible window types that you can expect as input are rectangular ('boxcar'), 'hamming' or | |
'blackmanharris'. | |
NOTE: You can approach this question in two ways: 1) You can write code to find the indices of the | |
local minimas across the main lobe. 2) You can manually note down the indices of these local minimas | |
by plotting and a visual inspection of the spectrum of the window. If done manually, the indices | |
have to be obtained for each possible window types separately (as they differ across different | |
window types). | |
Tip: log10(0) is not well defined, so its a common practice to add a small value such as eps = 1e-16 | |
to the magnitude spectrum before computing it in dB. This is optional and will not affect your answers. | |
If you find it difficult to concatenate the two halves of the main lobe, you can first center the | |
spectrum using fftshift() and then compute the indexes of the minimas around the main lobe. | |
Test case 1: If you run your code using window = 'blackmanharris' and M = 100, the output numpy | |
array should contain 65 samples. | |
Test case 2: If you run your code using window = 'boxcar' and M = 120, the output numpy array | |
should contain 17 samples. | |
Test case 3: If you run your code using window = 'hamming' and M = 256, the output numpy array | |
should contain 33 samples. | |
""" | |
def extractMainLobe(window, M): | |
""" | |
Input: | |
window (string): Window type to be used (Either rectangular ('boxcar'), 'hamming' or ' | |
blackmanharris') | |
M (integer): length of the window to be used | |
Output: | |
The function should return a numpy array containing the main lobe of the magnitude | |
spectrum of the window in decibels (dB). | |
""" | |
w = get_window(window, M) # get the window | |
### Your code here | |
N = 8*M | |
fftbuffer = np.zeros(N) | |
hM1 = (M + 1) // 2 | |
hM2 = M // 2 | |
fftbuffer[:hM1] = w[hM2:] | |
fftbuffer[-hM2:] = w[:hM1] | |
X = fft(fftbuffer) | |
X = abs(X) | |
X = fftshift(X) | |
center = N // 2 | |
hb = center | |
while X[hb] >= X[hb + 1]: | |
hb += 1 | |
lb = center | |
while X[lb] >= X[lb - 1]: | |
lb -= 1 | |
X = X[lb:hb+1] | |
X = 20 * np.log10(abs(X) + eps) | |
return X | |
if __name__ == '__main__': | |
import loadTestCases | |
import matplotlib.pyplot as plt | |
nCases = 3 | |
for i in range(nCases): | |
testcase = loadTestCases.load(1, i + 1) | |
samples = extractMainLobe(**testcase['input']) | |
w = get_window(testcase['input']['window'], testcase['input']['M']) | |
plt.subplot(3, nCases, i + nCases * 0 + 1) | |
plt.plot(w) | |
plt.title(f'Input ({testcase["input"]["window"]})') | |
plt.subplot(3, nCases, i + nCases * 1 + 1) | |
plt.plot(samples) | |
plt.title('Output samples') | |
plt.subplot(3, nCases, i + nCases * 2 + 1) | |
plt.plot(testcase['output']) | |
plt.title('Expected output samples') | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import numpy as np | |
import math | |
from scipy.signal import get_window | |
import matplotlib.pyplot as plt | |
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../software/models/')) | |
import stft | |
import utilFunctions as UF | |
eps = np.finfo(float).eps | |
""" | |
A4-Part-2: Measuring noise in the reconstructed signal using the STFT model | |
Write a function that measures the amount of noise introduced during the analysis and synthesis of a | |
signal using the STFT model. Use SNR (signal to noise ratio) in dB to quantify the amount of noise. | |
Use the stft() function in stft.py to do an analysis followed by a synthesis of the input signal. | |
A brief description of the SNR computation can be found in the pdf document (A4-STFT.pdf, in Relevant | |
Concepts section) in the assignment directory (A4). Use the time domain energy definition to compute | |
the SNR. | |
With the input signal and the obtained output, compute two different SNR values for the following cases: | |
1) SNR1: Over the entire length of the input and the output signals. | |
2) SNR2: For the segment of the signals left after discarding M samples from both the start and the | |
end, where M is the analysis window length. Note that this computation is done after STFT analysis | |
and synthesis. | |
The input arguments to the function are the wav file name including the path (inputFile), window | |
type (window), window length (M), FFT size (N), and hop size (H). The function should return a python | |
tuple of both the SNR values in decibels: (SNR1, SNR2). Both SNR1 and SNR2 are float values. | |
Test case 1: If you run your code using piano.wav file with 'blackman' window, M = 513, N = 2048 and | |
H = 128, the output SNR values should be around: (67.57748352378475, 304.68394693221649). | |
Test case 2: If you run your code using sax-phrase-short.wav file with 'hamming' window, M = 512, | |
N = 1024 and H = 64, the output SNR values should be around: (89.510506656299285, 306.18696700251388). | |
Test case 3: If you run your code using rain.wav file with 'hann' window, M = 1024, N = 2048 and | |
H = 128, the output SNR values should be around: (74.631476225366825, 304.26918192997738). | |
Due to precision differences on different machines/hardware, compared to the expected SNR values, your | |
output values can differ by +/-10dB for SNR1 and +/-100dB for SNR2. | |
""" | |
def computeSNR(inputFile, window, M, N, H): | |
""" | |
Input: | |
inputFile (string): wav file name including the path | |
window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, | |
blackman, blackmanharris) | |
M (integer): analysis window length (odd positive integer) | |
N (integer): fft size (power of two, > M) | |
H (integer): hop size for the stft computation | |
Output: | |
The function should return a python tuple of both the SNR values (SNR1, SNR2) | |
SNR1 and SNR2 are floats. | |
""" | |
## your code here | |
(fs, x) = UF.wavread(inputFile) | |
w = get_window(window, M, False) | |
y = stft.stft(x, w, N, H) | |
noise = y - x | |
SNR1 = 10 * np.log10(np.sum(x**2) / np.sum(noise**2)) | |
trimmed_x = x[M:-M] | |
trimmed_noise = noise[M:-M] | |
# print(M) | |
# print(x.shape) | |
# print(trimmed_x.shape) | |
# | |
# print(f'E x: {np.sum(x**2)}') | |
# print(f'E trimmed x: {np.sum(trimmed_x**2)}') | |
# | |
# print(f'E noise: {np.sum(noise**2)}') | |
# print(f'E trimmed noise: {np.sum(trimmed_noise**2)}') | |
# | |
# plt.subplot(3, 2, 1) | |
# plt.plot(x[:1000]) | |
# plt.subplot(3, 2, 3) | |
# plt.plot(y[:1000]) | |
# plt.subplot(3, 2, 5) | |
# plt.plot(noise[:1000]) | |
# | |
# plt.subplot(3, 2, 2) | |
# plt.plot(trimmed_x[:1000]) | |
# plt.subplot(3, 2, 4) | |
# plt.plot(trimmed_y[:1000]) | |
# plt.subplot(3, 2, 6) | |
# plt.plot(trimmed_noise[:1000]) | |
# plt.show() | |
# | |
# | |
# exit(1) | |
# | |
SNR2 = 10 * np.log10(np.sum(trimmed_x**2) / np.sum(trimmed_noise**2)) | |
return (SNR1, SNR2) | |
if __name__ == '__main__': | |
import loadTestCases | |
import matplotlib.pyplot as plt | |
nCases = 3 | |
for i in range(nCases): | |
testcase = loadTestCases.load(2, i + 1) | |
(SNR1, SNR2) = computeSNR(**testcase['input']) | |
print(f'Test case {i + 1}') | |
print(f'inputFile: {testcase["input"]}') | |
print(f'Expected: {testcase["output"]}') | |
print(f'Actual: {(SNR1, SNR2)}') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import numpy as np | |
from scipy.signal import get_window | |
import matplotlib.pyplot as plt | |
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../software/models/')) | |
import stft | |
import utilFunctions as UF | |
eps = np.finfo(float).eps | |
""" | |
A4-Part-3: Computing band-wise energy envelopes of a signal | |
Write a function that computes band-wise energy envelopes of a given audio signal by using the STFT. | |
Consider two frequency bands for this question, low and high. The low frequency band is the set of | |
all the frequencies between 0 and 3000 Hz and the high frequency band is the set of all the | |
frequencies between 3000 and 10000 Hz (excluding the boundary frequencies in both the cases). | |
At a given frame, the value of the energy envelope of a band can be computed as the sum of squared | |
values of all the frequency coefficients in that band. Compute the energy envelopes in decibels. | |
Refer to "A4-STFT.pdf" document for further details on computing bandwise energy. | |
The input arguments to the function are the wav file name including the path (inputFile), window | |
type (window), window length (M), FFT size (N) and hop size (H). The function should return a numpy | |
array with two columns, where the first column is the energy envelope of the low frequency band and | |
the second column is that of the high frequency band. | |
Use stft.stftAnal() to obtain the STFT magnitude spectrum for all the audio frames. Then compute two | |
energy values for each frequency band specified. While calculating frequency bins for each frequency | |
band, consider only the bins that are within the specified frequency range. For example, for the low | |
frequency band consider only the bins with frequency > 0 Hz and < 3000 Hz (you can use np.where() to | |
find those bin indexes). This way we also remove the DC offset in the signal in energy envelope | |
computation. The frequency corresponding to the bin index k can be computed as k*fs/N, where fs is | |
the sampling rate of the signal. | |
To get a better understanding of the energy envelope and its characteristics you can plot the envelopes | |
together with the spectrogram of the signal. You can use matplotlib plotting library for this purpose. | |
To visualize the spectrogram of a signal, a good option is to use colormesh. You can reuse the code in | |
sms-tools/lectures/4-STFT/plots-code/spectrogram.py. Either overlay the envelopes on the spectrogram | |
or plot them in a different subplot. Make sure you use the same range of the x-axis for both the | |
spectrogram and the energy envelopes. | |
NOTE: Running these test cases might take a few seconds depending on your hardware. | |
Test case 1: Use piano.wav file with window = 'blackman', M = 513, N = 1024 and H = 128 as input. | |
The bin indexes of the low frequency band span from 1 to 69 (69 samples) and of the high frequency | |
band span from 70 to 232 (163 samples). To numerically compare your output, use loadTestCases.py | |
script to obtain the expected output. | |
Test case 2: Use piano.wav file with window = 'blackman', M = 2047, N = 4096 and H = 128 as input. | |
The bin indexes of the low frequency band span from 1 to 278 (278 samples) and of the high frequency | |
band span from 279 to 928 (650 samples). To numerically compare your output, use loadTestCases.py | |
script to obtain the expected output. | |
Test case 3: Use sax-phrase-short.wav file with window = 'hamming', M = 513, N = 2048 and H = 256 as | |
input. The bin indexes of the low frequency band span from 1 to 139 (139 samples) and of the high | |
frequency band span from 140 to 464 (325 samples). To numerically compare your output, use | |
loadTestCases.py script to obtain the expected output. | |
In addition to comparing results with the expected output, you can also plot your output for these | |
test cases.You can clearly notice the sharp attacks and decay of the piano notes for test case 1 | |
(See figure in the accompanying pdf). You can compare this with the output from test case 2 that | |
uses a larger window. You can infer the influence of window size on sharpness of the note attacks | |
and discuss it on the forums. | |
""" | |
def computeEngEnv(inputFile, window, M, N, H): | |
""" | |
Inputs: | |
inputFile (string): input sound file (monophonic with sampling rate of 44100) | |
window (string): analysis window type (choice of rectangular, triangular, hanning, | |
hamming, blackman, blackmanharris) | |
M (integer): analysis window size (odd positive integer) | |
N (integer): FFT size (power of 2, such that N > M) | |
H (integer): hop size for the stft computation | |
Output: | |
The function should return a numpy array engEnv with shape Kx2, K = Number of frames | |
containing energy envelop of the signal in decibles (dB) scale | |
engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB) | |
engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB) | |
""" | |
### your code here | |
(fs, x) = UF.wavread(inputFile) | |
w = get_window(window, M, False) | |
mX, pX = stft.stftAnal(x, w, N, H) | |
mX = 10 ** (mX / 20) | |
# Get the frequency bins | |
freq_per_bin = fs / N | |
lfb = np.ceil(3000 / freq_per_bin) | |
hfb = np.ceil(10000 / freq_per_bin) | |
low_freq_bins = np.arange(1, lfb, dtype=int) | |
high_freq_bins = np.arange(lfb, hfb, dtype=int) | |
#print(freq_per_bin) | |
#print(lfb, hfb) | |
#print(low_freq_bins) | |
#print(low_freq_bins * freq_per_bin) | |
#print(high_freq_bins) | |
#print(high_freq_bins * freq_per_bin) | |
low_band = mX[:, low_freq_bins] | |
high_band = mX[:, high_freq_bins] | |
# Compute the energy envelope | |
low_band_env = 10 * np.log10(np.sum(np.abs(low_band) ** 2, axis=1)) | |
high_band_env = 10 * np.log10(np.sum(np.abs(high_band) ** 2, axis=1)) | |
engEnv = (np.vstack((low_band_env, high_band_env))).T | |
return engEnv | |
if __name__ == '__main__': | |
import loadTestCases | |
import matplotlib.pyplot as plt | |
nCases = 3 | |
f, axs = plt.subplots(3, nCases, figsize=(16, 8)) | |
f.tight_layout(h_pad=3, w_pad=3) | |
for i in range(nCases): | |
testcase = loadTestCases.load(3, i + 1) | |
engEnv = computeEngEnv(**testcase['input']) | |
print(f'Test case {i+1}:') | |
print(f'Actual output: {engEnv}') | |
print(f'Expected output: {testcase["output"]}') | |
axs[0][i].plot(engEnv[:, 0], label='Low Band') | |
axs[0][i].plot(engEnv[:, 1], label='High Band') | |
axs[0][i].set_xlabel(f'Frame number') | |
axs[0][i].set_ylabel(f'Energy (dB)') | |
axs[0][i].set_title(f'Test case {i+1}: Actual') | |
axs[0][i].legend() | |
axs[1][i].plot(testcase['output'][:, 0], label='Low Band') | |
axs[1][i].plot(testcase['output'][:, 1], label='High Band') | |
axs[1][i].set_xlabel(f'Frame number') | |
axs[1][i].set_ylabel(f'Energy (dB)') | |
axs[1][i].set_title(f'Test case {i+1}: Expected') | |
axs[1][i].legend() | |
axs[2][i].plot(engEnv[:, 0] - testcase['output'][:, 0], label='Low Band') | |
axs[2][i].plot(engEnv[:, 1] - testcase['output'][:, 1], label='High Band') | |
axs[2][i].set_xlabel(f'Frame number') | |
axs[2][i].set_ylabel(f'Energy (dB)') | |
axs[2][i].set_title(f'Error') | |
axs[2][i].legend() | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
token: RyO0zLvI8u0fREQ9