Home

Helldiver soldier noticed a difference in the sound of hot water coming out of the faucet and cold water coming out of the faucet.¶

In [1]:
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import Audio
from pydub import AudioSegment
import warnings

warnings.filterwarnings("ignore")

Description of input files:¶

Coldhotcold.m4a. Before 6 secons is cold, between 6 and 12.5 is hot, after 12.5 is cold again.¶

Cold.m4a and Hot.m4a are training file.¶

In [2]:
audio = AudioSegment.from_file("Cold.m4a", format="m4a")
audio.export("Cold.wav", format="wav")
Audio('Cold.wav')
Out[2]:
Your browser does not support the audio element.
In [3]:
audio = AudioSegment.from_file("Hot.m4a", format="m4a")
audio.export("Hot.wav", format="wav")
Audio('Hot.wav')
Out[3]:
Your browser does not support the audio element.

Simple Check:¶

Spectral Centroid is very helpful. And the mfcc can give some clue at a glance.¶

MFCC the layer 1,4 and 7¶

In [4]:
# Before 5.5 seconds is cold, between 6 and 12.5 is hot, after 12.5 is cold.
filename = 'Coldhotcold.m4a'
y, sr = librosa.load(filename)
duration = librosa.get_duration(y=y, sr=sr)

# Extract 8 MFCC coefficients
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=8)

window_size = 5  # Adjust the window size for smoothing
smoothed_mfccs = np.apply_along_axis(
    lambda x: np.convolve(x, np.ones(window_size) / window_size, mode='same'), axis=1, arr=mfccs
)

plt.figure(figsize=(20, 4))
plt.imshow(smoothed_mfccs, aspect='auto', origin='lower', extent=[0, duration, 0, mfccs.shape[0]], cmap='tab20')

plt.colorbar(label="MFCC Coefficient Value")
plt.xlabel("Time (seconds)")
plt.ylabel("MFCC Coefficient Index")


duration = librosa.get_duration(y=y, sr=sr)
plt.xticks(np.arange(0, duration, step=1))
plt.vlines(6, 0, 8, color='red')
plt.vlines(12.5, 0, 8, color='red')
plt.hlines(4, 0, duration, color='red')
plt.hlines(3, 0, duration, color='red')
plt.title("MFCCs Over Time")

plt.show()
No description has been provided for this image

spectral centroid of the sound¶

In [5]:
# Load an audio signal
filename = 'Coldhotcold.m4a'
y, sr = librosa.load(filename)

# Compute the spectral centroid
spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)[0]

# Smooth the spectral centroid using a moving average
window_size = 50  # Size of the sliding window
smoothed_centroid = np.convolve(spectral_centroids, np.ones(window_size) / window_size, mode='valid')

# Convert frame indices to time
frames = range(len(spectral_centroids))
time = librosa.frames_to_time(frames, sr=sr)

# Trim time to match the smoothed centroid length
time_smooth = time[:len(smoothed_centroid)]

# Plot the original and smoothed spectral centroid
plt.figure(figsize=(10, 6))
plt.plot(time, spectral_centroids, label='Original Spectral Centroid', alpha=0.6)
plt.plot(time_smooth, smoothed_centroid, label='Smoothed Spectral Centroid', color='red')
plt.vlines(6, 0, 7000, color='red')
plt.vlines(12.5, 0, 7000, color='red')
plt.title('Spectral Centroid Smoothing using Moving Average')
plt.xlabel('Time (s)')
plt.ylabel('Frequency (Hz)')
plt.legend()
plt.show()
No description has been provided for this image

Spectogram of the sound¶

In [6]:
import librosa
import librosa.display
import matplotlib.pyplot as plt

# Load audio file
filename = 'Coldhotcold.m4a'
y, sr = librosa.load(filename)

# Compute the Short-Time Fourier Transform (STFT)
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)

# Plot the spectrogram
plt.figure(figsize=(10, 6))
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='linear', cmap='tab20')
plt.vlines(6, 0, 10000, color='green')
plt.vlines(12.5, 0, 10000, color='green')
plt.colorbar(format='%+2.0f dB')
plt.title('Spectrogram (Linear Frequency)')
plt.xlabel('Time (s)')
plt.ylabel('Frequency (Hz)')
plt.show()
No description has been provided for this image

Plot Histogram of Hot and Cold¶

In [7]:
from typing import List
import librosa

filenames = ['Cold.m4a', 'Hot.m4a']
file_numbers = len(filenames)
mfcc_layers = 8
mfcc_list = [[0 for _ in range(mfcc_layers)] for _ in range(file_numbers)]
durations: List[float] = []

for i, filename in enumerate(filenames):
    y, sr = librosa.load(filename)
    durations.append(librosa.get_duration(y=y, sr=sr))
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=mfcc_layers)
    window_size = 1  # Adjust the window size for smoothing
    mfccs = np.apply_along_axis(
        lambda x: np.convolve(x, np.ones(window_size) / window_size, mode='same'), axis=1, arr=mfccs
    )
    for j in range(mfcc_layers):
        mfcc_list[i][j] = mfccs[j]

#plot hist of each mfcc layer
figs, axes = plt.subplots(nrows=mfcc_layers, ncols=1, figsize=(20, 5 * mfcc_layers))
plt.subplots_adjust(hspace=0.6, wspace=0.5)
for i in range(mfcc_layers):
    for j in range(file_numbers):
        ax_show = axes[i].hist(mfcc_list[j][i], bins=50, alpha=0.5, color='blue' if j == 0 else 'red')
        axes[i].set_xlabel("MFCC Coefficient Value")
        axes[i].set_ylabel("Count")
        axes[i].set_title("MFCCs Histogram for Layer " + str(i + 1))

plt.show()
No description has been provided for this image

Use MFCC layer 7 and layer 8 to scatter point for cold and hot¶

In [8]:
from copy import deepcopy
from sklearn.preprocessing import StandardScaler, MinMaxScaler

scaler = MinMaxScaler()
mfcc_copy = deepcopy(mfcc_list)

plt.figure(figsize=(20, 20))
x_layer = 7
y_layer = 6
plt.scatter(scaler.fit_transform(pd.DataFrame(mfcc_list[1][x_layer])),
            scaler.fit_transform(pd.DataFrame(mfcc_list[1][y_layer])), color='red', label='Hot', alpha=0.5)
plt.scatter(scaler.fit_transform(pd.DataFrame(mfcc_list[0][x_layer])),
            scaler.fit_transform(pd.DataFrame(mfcc_list[0][y_layer])), color='blue', label='Cold', alpha=0.5)

plt.show()
No description has been provided for this image
In [9]:
import matplotlib.image as mpimg

plt.figure(figsize=(10, 10))
image = mpimg.imread('3Dshow.png')  # Replace with your image path
plt.imshow(image)
plt.axis('off')  # Remove axes
plt.gca().set_axis_off()  # Further ensure no borders
plt.subplots_adjust(left=0, right=1, top=1, bottom=0)  # Remove white padding
plt.show()
No description has been provided for this image
In [10]:
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


# Function to extract MFCCs from an audio file
def extract_mfcc_features(filename, target: int, n_mfcc=8):
    y, sr = librosa.load(filename)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    df = pd.DataFrame(mfccs.T)
    df['target'] = target
    return df


# Load your data (list of audio filenames and their labels)
audio_files = ['Cold.m4a', 'Hot.m4a']  # Replace with your files
labels = [0, 1]  # Corresponding targets for each file

# Extract features and prepare the dataset
df_cold = extract_mfcc_features("Cold.m4a", target=0)
df_hot = extract_mfcc_features("Hot.m4a", target=1)
features = pd.concat([df_cold, df_hot], ignore_index=True)
targets = features.pop('target')

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, random_state=42, stratify=targets,
                                                    shuffle=True)

# Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

indices = np.argsort(model.feature_importances_)
sorted_features = features.columns[indices]
print(f"mfcc layer importance is: {sorted_features}")
Accuracy: 0.9947780678851175

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.99      0.99       168
           1       0.99      1.00      1.00       215

    accuracy                           0.99       383
   macro avg       1.00      0.99      0.99       383
weighted avg       0.99      0.99      0.99       383

mfcc layer importance is: Index([2, 4, 1, 7, 0, 3, 6, 5], dtype='object')

Test with the sound¶

In [11]:
filename = 'Coldhotcold.m4a'
n_mfcc = 8
y, sr = librosa.load(filename)
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
df = pd.DataFrame(mfccs.T)
y_pred = model.predict(df)
In [12]:
plt.figure(figsize=(20, 6))
plt.scatter(np.arange(len(y_pred))*0.023, y_pred)
plt.show()
No description has been provided for this image