-
Notifications
You must be signed in to change notification settings - Fork 0
/
realtime.py
180 lines (137 loc) · 5.46 KB
/
realtime.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import pyaudio
import numpy as np
import pyfftw
import matplotlib.pyplot as plt
# Parameters
FORMAT = pyaudio.paInt16 # Audio format (16-bit PCM)
CHANNELS = 1 # Mono audio
RATE = 44100 # Sampling rate (44.1 kHz)
CHUNK = 4096 # Size of audio chunks to read
# Initialize PyAudio
audio = pyaudio.PyAudio()
# Open a stream with the desired parameters
stream = audio.open(
format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK
)
# ----------- Load MP3 file
from pydub import AudioSegment
audio_file = "3.mp3"
audio_segment = AudioSegment.from_file(audio_file)
# Convert audio segment to raw audio data
samples = np.array(audio_segment.get_array_of_samples())
sample_rate = audio_segment.frame_rate
# Process the audio data in chunks
num_chunks = len(samples) // CHUNK
notes = []
# ------------ End Load MP3
print("Recording...")
prev_note = 0
note_names = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
A4 = 440.0 # Frequency of A4
C0 = A4 * np.power(2, -4.75)
def freq_to_note_name(freq):
if freq == 0:
return None
note_num = 12 * np.log2(freq / C0)
note = int(round(note_num)) % 12
return note_names[note]
word_patterns = [
["four", 0, [["G#", "G", "E", "C", "B"], [], ["G#", "G", "E", "C", "B"]]],
[
"three",
0,
[["F", "E", "D"], ["F", "D"], ["G", "F", "D", "A#"], [], ["G", "F", "D", "A#"]],
],
]
try:
# Audio Stream
# while True:
# Read a chunk of audio data
# data = stream.read(CHUNK)
# audio_data = np.frombuffer(data, dtype=np.int16)
# end audio stream
# Mp3 file
for i in range(num_chunks):
# Get the current chunk
audio_data = samples[i * CHUNK : (i + 1) * CHUNK]
# end mp3 file
# Perform FFT using FFTW
fft_result = pyfftw.interfaces.numpy_fft.fft(audio_data)
fft_freq = np.fft.fftfreq(len(fft_result), 1.0 / RATE)
# Get the magnitude of the FFT
magnitude = np.abs(fft_result)
# print(magnitude)
# Find the peak frequency
peak_index = np.argmax(magnitude)
peak_freq = abs(fft_freq[peak_index])
# Threshold to consider significant peaks
threshold = np.max(magnitude) * 0.3
significant_indices = np.where(magnitude > threshold)[0]
# Find peak frequencies and their magnitudes
peak_freqs = [(abs(fft_freq[i]), magnitude[i]) for i in significant_indices]
# Sort peaks by magnitude (loudest first)
peak_freqs.sort(key=lambda x: x[1], reverse=True)
# Use a dictionary to keep the loudest example of each note
loudest_notes = {}
for freq, mag in peak_freqs:
note = freq_to_note_name(freq)
if note and (note not in loudest_notes or mag > loudest_notes[note][0]):
loudest_notes[note] = (int(mag), int(freq))
loudest_notes_sorted = sorted(
loudest_notes.items(), key=lambda x: x[0], reverse=True
)
peak_notes = [(note, mag) for note, mag in loudest_notes_sorted[:12]]
peak_notes_only = [note for note, mag in loudest_notes_sorted[:12]]
# Get the loudest frequencies
# top_freqs = peak_freqs[:8]
# Map peak frequencies to notes
# peak_notes = [(freq_to_note_name(freq),int(mag),int(freq)) for freq, mag in top_freqs if freq_to_note_name(freq)]
# Map peak frequencies to notes
# peak_notes = [freq_to_note_name(freq) for freq in peak_freqs if freq_to_note_name(freq)]
# Identify the most common notes (top 3 for simplicity)
# note_counts = Counter(peak_notes)
# common_notes = [note for note, count in note_counts.most_common(3)]
# Identify chord from loudest notes
# chord = identify_chord(peak_notes)
# if chord is not "Unknown":
if magnitude[peak_index] > 1000000 and peak_freq > 50:
# 10000 for stream
# print(peak_notes)
print(
f"Detected Notes: {str(peak_notes):<85}, Peak Frequency: {peak_freq:7.2f} Hz"
)
for word_obj in word_patterns:
if sorted(word_obj[2][word_obj[1]]) == sorted(peak_notes_only):
print(str(word_obj[0])+" "+str(word_obj[1]))
word_obj[1]+=1
elif sorted(word_obj[2][word_obj[1]-1]) != sorted(peak_notes_only) and word_obj[2][word_obj[1]] == [] :
print(str(word_obj[0])+" "+str(word_obj[1]))
word_obj[1]+=1
if len(word_obj[2]) == word_obj[1]:
print("WOOO WORD "+word_obj[0]+" FOUND")
word_obj[1]=0
plt.plot(fft_freq[: len(fft_freq) // 2], magnitude[: len(magnitude) // 2])
plt.xlim(0, 500)
plt.ylim(0, 10000000)
plt.xlabel("Frequency (Hz)")
plt.ylabel("Magnitude")
plt.title(
"FFT of MP3 File: "
+ audio_file
+ " from "
+ str(i * CHUNK)
+ " to "
+ str((i + 1) * CHUNK)
)
plt.show()
# if note is not prev_note and note > 0 and peak_index > 100:
# note_name = note_names[note]
# print(f"Peak Frequency: {peak_freq:.2f} Hz, Note: {note_name}, Magnitude: {peak_index}")
# prev_note = note
except KeyboardInterrupt:
pass
print("Finished recording")
# Stop and close the stream
stream.stop_stream()
stream.close()
audio.terminate()