Build a Smart Bluetooth Speaker with Arduino
DIY Arduino-powered Bluetooth speaker with voice commands. Step-by-step electronics project.
π΅ Building the Smart Speaker of Your Dreams
Tired of overpriced smart speakers that spy on you? Let's build one that respects your privacy, sounds amazing, and costs under $50. Plus, it has features even Amazon can't offer!
π― What We're Building:
- β’ Bluetooth 5.0: Crystal clear wireless audio
- β’ Voice Commands: Offline speech recognition
- β’ RGB Lighting: Beat-synced LEDs
- β’ Touch Controls: Tap to play/pause, swipe for volume
- β’ Smart Features: Weather, timer, music control
- β’ Privacy First: No cloud connections required
π§ Components Shopping List
| Component | Model/Specs | Price | Why This One? |
|---|---|---|---|
| Microcontroller π§ | ESP32-S3 | $8 | Built-in WiFi, Bluetooth, AI acceleration |
| Audio Amplifier | MAX98357A I2S | $6 | High-quality digital amplifier |
| Speakers π | 2x 4Ξ© 3W Full Range | $12 | Stereo sound, good bass response |
| Microphone | INMP441 I2S MEMS | $5 | Digital, low noise, omnidirectional |
| Touch Sensor | TTP229 16-Channel | $4 | Capacitive touch, gesture detection |
| LED Strip β¨ | WS2812B (24 LEDs) | $6 | Addressable RGB, music visualization |
| Display | 0.96" OLED I2C | $4 | Show time, weather, track info |
| Power Supply | 5V 3A USB-C | $5 | Stable power for audio quality |
π° Total Project Cost: ~$50 (vs $200+ for equivalent commercial speaker)
β‘ Circuit Design & Wiring
Main Board Connections
ESP32-S3 Pin Connections:
βββββββββββββββββββββββββββββββββββββββββββ
β Audio System: β
β GPIO1 β MAX98357A DIN (I2S Data) β
β GPIO2 β MAX98357A BCLK (I2S Clock) β
β GPIO42 β MAX98357A LRC (I2S Word Select)|
β 5V β MAX98357A VIN β
β GND β MAX98357A GND β
β β
β Microphone: β
β GPIO4 β INMP441 SD (I2S Data) β
β GPIO5 β INMP441 SCK (I2S Clock) β
β GPIO6 β INMP441 WS (I2S Word Select) β
β 3.3V β INMP441 VDD β
β β
β Display & Controls: β
β GPIO8 β OLED SDA (I2C Data) β
β GPIO9 β OLED SCL (I2C Clock) β
β GPIO10 β Touch Sensor SDO β
β GPIO11 β Touch Sensor SCL β
β β
β LED Strip: β
β GPIO12 β WS2812B Data In β
β 5V β WS2812B Power β
β GND β WS2812B Ground β
βββββββββββββββββββββββββββββββββββββββββββ
Power Distribution
β‘ Power Management:
- β’ ESP32-S3: 3.3V from onboard regulator
- β’ Audio Amplifier: 5V direct from USB-C (high power)
- β’ LED Strip: 5V with current limiting resistor
- β’ Digital Components: 3.3V from ESP32 regulator
- β’ Power Consumption: ~1.5A peak (music + full LED brightness)
π» Software Architecture
Main Application Code
#include "WiFi.h"
#include "BluetoothA2DPSink.h"
#include "ESP32-audioI2S.h"
#include "FastLED.h"
#include "SSD1306.h"
#include "ArduinoJson.h"
// Hardware pin definitions
#define I2S_DOUT 1
#define I2S_BCLK 2
#define I2S_LRC 42
#define LED_PIN 12
#define LED_COUNT 24
#define TOUCH_PIN 10
// Global objects
BluetoothA2DPSink a2dp_sink;
Audio audio;
CRGB leds[LED_COUNT];
SSD1306 display(0x3C, 8, 9);
class SmartSpeaker {
public:
void setup() {
Serial.begin(115200);
// Initialize audio system
setupAudio();
// Initialize Bluetooth
setupBluetooth();
// Initialize LED strip
FastLED.addLeds(leds, LED_COUNT);
// Initialize display
display.init();
display.setFont(ArialMT_Plain_16);
// Initialize touch controls
setupTouchControls();
// Initialize voice recognition
setupVoiceCommands();
Serial.println("Smart Speaker initialized!");
displayStatus("Ready to Rock!");
}
void loop() {
// Handle audio visualization
processAudioVisualization();
// Check for touch input
processTouchControls();
// Process voice commands
processVoiceCommands();
// Update display
updateDisplay();
// Handle any pending tasks
delay(10);
}
private:
void setupAudio() {
// Configure I2S audio output
audio.setPinout(I2S_BCLK, I2S_LRC, I2S_DOUT);
audio.setVolume(15); // 0-21
// Audio event callbacks
audio.setAudioTaskCore(1);
audio.setAudioTaskPrio(1);
}
void setupBluetooth() {
i2s_config_t i2s_config = {
.mode = (i2s_mode_t) (I2S_MODE_MASTER | I2S_MODE_TX),
.sample_rate = 44100,
.bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT,
.channel_format = I2S_CHANNEL_FMT_RIGHT_LEFT,
.communication_format = I2S_COMM_FORMAT_STAND_MSB,
.intr_alloc_flags = 0,
.dma_buf_count = 8,
.dma_buf_len = 64,
.use_apll = false
};
a2dp_sink.set_i2s_config(i2s_config);
a2dp_sink.start("TechGeek Speaker");
}
void processAudioVisualization() {
// Get current audio levels
float audioLevel = getCurrentAudioLevel();
// Map audio to LED colors and patterns
if (audioLevel > 0.1) {
visualizeBeats(audioLevel);
} else {
showIdleAnimation();
}
FastLED.show();
}
void visualizeBeats(float level) {
static uint8_t hue = 0;
// Create beat-responsive visualization
for(int i = 0; i < LED_COUNT; i++) {
float distance = abs(i - (LED_COUNT/2));
float brightness = max(0.0f, level * 255 - (distance * 20));
leds[i] = CHSV(hue + (i * 10), 255, (uint8_t)brightness);
}
hue += 2; // Slowly cycle through colors
}
};
Voice Command System
#include "esp_sr_models.h"
#include "esp_wn_models.h"
class VoiceCommands {
private:
const char* wake_word = "hey speaker";
const char* commands[] = {
"play music",
"pause",
"volume up",
"volume down",
"next song",
"previous song",
"what time is it",
"how's the weather",
"lights off",
"lights on"
};
public:
void setup() {
// Initialize speech recognition
ESP_ERROR_CHECK(esp_sr_init(ESP_SR_MODE_SR));
// Set wake word model
sr_setup_wake_word();
// Setup command recognition
sr_setup_commands();
}
void processCommands() {
if (sr_detect_wake_word()) {
playAcknowledgment(); // Beep sound
displayStatus("Listening...");
// Listen for command (3 second timeout)
String command = sr_recognize_command(3000);
if (command != "") {
executeCommand(command);
} else {
displayStatus("Didn't catch that");
}
}
}
void executeCommand(String command) {
if (command.indexOf("play") >= 0) {
bluetoothPlay();
displayStatus("Playing music");
}
else if (command.indexOf("pause") >= 0) {
bluetoothPause();
displayStatus("Music paused");
}
else if (command.indexOf("volume up") >= 0) {
adjustVolume(+2);
displayStatus("Volume up");
}
else if (command.indexOf("volume down") >= 0) {
adjustVolume(-2);
displayStatus("Volume down");
}
else if (command.indexOf("time") >= 0) {
displayCurrentTime();
}
else if (command.indexOf("weather") >= 0) {
getWeatherReport();
}
else if (command.indexOf("lights off") >= 0) {
FastLED.clear();
FastLED.show();
displayStatus("Lights off");
}
else if (command.indexOf("lights on") >= 0) {
showIdleAnimation();
displayStatus("Lights on");
}
}
};
π¨ Advanced Features Implementation
1. Touch Gesture Controls
class TouchControls {
private:
int lastTouchValue = 0;
unsigned long lastTouchTime = 0;
public:
void processTouchInput() {
int touchValue = touchRead(TOUCH_PIN);
unsigned long currentTime = millis();
// Detect touch gestures
if (touchValue < 30 && lastTouchValue > 30) { // Touch started
detectGesture(currentTime - lastTouchTime);
}
lastTouchValue = touchValue;
lastTouchTime = currentTime;
}
void detectGesture(unsigned long duration) {
if (duration < 300) {
// Quick tap = Play/Pause
togglePlayback();
}
else if (duration < 1000) {
// Medium press = Skip track
skipTrack();
}
else {
// Long press = Enter pairing mode
enterPairingMode();
}
}
// Capacitive touch zones (if using touch pad)
void handleTouchZones() {
uint16_t touchStatus = readTouchPad();
// Left side = Previous track
if (touchStatus & 0x01) {
previousTrack();
}
// Right side = Next track
else if (touchStatus & 0x02) {
nextTrack();
}
// Top = Volume up
else if (touchStatus & 0x04) {
adjustVolume(+1);
}
// Bottom = Volume down
else if (touchStatus & 0x08) {
adjustVolume(-1);
}
}
};
2. Weather Integration
class WeatherService {
private:
const char* api_key = "YOUR_OPENWEATHER_API_KEY";
const char* city = "San Francisco";
public:
void getWeatherReport() {
if (WiFi.status() == WL_CONNECTED) {
HTTPClient http;
String url = "http://api.openweathermap.org/data/2.5/weather?q=" +
String(city) + "&appid=" + String(api_key) + "&units=metric";
http.begin(url);
int httpCode = http.GET();
if (httpCode == 200) {
String payload = http.getString();
parseWeatherData(payload);
}
http.end();
} else {
displayStatus("No WiFi connection");
}
}
void parseWeatherData(String json) {
DynamicJsonDocument doc(1024);
deserializeJson(doc, json);
int temp = doc["main"]["temp"];
const char* description = doc["weather"][0]["description"];
int humidity = doc["main"]["humidity"];
// Display weather info
display.clear();
display.drawString(0, 0, String(temp) + "Β°C");
display.drawString(0, 20, String(description));
display.drawString(0, 40, "Humidity: " + String(humidity) + "%");
display.display();
// Optional: Announce weather via text-to-speech
speakWeather(temp, description);
}
void speakWeather(int temp, const char* desc) {
String announcement = "Current temperature is " + String(temp) +
" degrees celsius. " + String(desc);
// Use ESP32 speech synthesis (if available)
// or play pre-recorded audio clips
textToSpeech(announcement);
}
};
3. Music Visualization Algorithms
class AudioVisualizer {
private:
float audioSamples[64];
float fftOutput[32];
public:
void updateVisualization() {
// Sample current audio
sampleAudio();
// Perform FFT for frequency analysis
performFFT();
// Choose visualization based on music characteristics
if (detectBeat()) {
showBeatDrop();
} else if (detectBass()) {
showBassVisualization();
} else {
showSpectrumAnalyzer();
}
}
void showSpectrumAnalyzer() {
for(int i = 0; i < LED_COUNT; i++) {
int freqBin = map(i, 0, LED_COUNT-1, 0, 31);
float magnitude = fftOutput[freqBin];
// Color based on frequency
uint8_t hue = map(freqBin, 0, 31, 0, 255);
uint8_t brightness = constrain(magnitude * 255, 0, 255);
leds[i] = CHSV(hue, 255, brightness);
}
}
void showBeatDrop() {
static uint8_t beatHue = 0;
// Flash all LEDs on beat
fill_solid(leds, LED_COUNT, CHSV(beatHue, 255, 255));
FastLED.show();
delay(50);
// Fade out
fadeToBlackBy(leds, LED_COUNT, 180);
beatHue += 30; // Change color each beat
}
bool detectBeat() {
// Simple beat detection using bass frequencies
float bassEnergy = 0;
for(int i = 0; i < 4; i++) {
bassEnergy += fftOutput[i];
}
static float lastBassEnergy = 0;
float energyDiff = bassEnergy - lastBassEnergy;
lastBassEnergy = bassEnergy;
return (energyDiff > 0.3); // Threshold for beat detection
}
};
π± Mobile App Integration
Create a companion app for advanced control:
π± App Features:
- β’ EQ Controls: Custom audio equalization
- β’ Light Patterns: Create custom LED animations
- β’ Voice Training: Add custom wake words and commands
- β’ Timer & Alarms: Schedule music and reminders
- β’ Multi-Room: Sync multiple speakers
- β’ OTA Updates: Update firmware wirelessly
π 3D Printed Enclosure
π¨ Design Considerations:
- β’ Acoustic Ports: Tuned for optimal bass response
- β’ Heat Dissipation: Vents for ESP32 and amplifier cooling
- β’ LED Diffusion: Translucent sections for even light distribution
- β’ Touch Surface: Smooth area for capacitive touch controls
- β’ Cable Management: Hidden power and optional aux input
βοΈ Performance Optimizations
π οΈ Troubleshooting Guide
| Problem | Possible Cause | Solution |
|---|---|---|
| No Audio Output | Wiring, power, or I2S config | Check connections, verify 5V to amp |
| Bluetooth Won't Pair | Device in pairing mode already | Reset ESP32, clear Bluetooth cache |
| LEDs Not Working | Power or data signal issue | Check 5V power, add 330Ξ© resistor |
| Voice Commands Fail | Microphone placement/noise | Position mic away from speakers |
You now have a smart speaker that sounds better than most commercial options, respects your privacy, and costs a fraction of the price! π΅π
Next Steps: Share your build on social media! Tag us to show off your custom LED patterns and voice commands. The DIY audio community loves creative builds! πΈπΆ
Tags
Related Articles
Robotics Demo: Ultrasonic Sensor Triggers Robot Car
6 min readIoT for Pets: Smart Sensor Belt for Cats/Dogs
6 min readHow to Integrate AI in Robotics: Simple Architecture Explained
7 min readπ‘ Want to learn more?
Explore our comprehensive courses on AI, programming, and robotics.
Browse Courses