Zum Hauptinhalt springen

Voice Agents JS Example (Gemini Basic Call Transfer)

Voice Agents JS Example - Gemini Basic Call Transfer

This example demonstrates a basic Voice Agents script that uses the power of Google Gemini by integrating it with Vodia's Javascript Voice Agents capabilities. The integration uses Gemini's bidirectional audio streaming WebSocket API to handle real-time voice conversations and intelligent call routing.

Gemini Setup:

Please refer to the documentation here for Gemini setup, including obtaining your API key and configuring the necessary permissions.

Scenario:

  • Establish a WebSocket connection directly to Gemini's bidirectional streaming API endpoint.
  • Configure the session with model parameters, voice settings, and system instructions.
  • Stream PCM audio from the caller to Gemini in real-time.
  • Receive audio responses from Gemini and play them back to the caller.
  • Define function tools that Gemini can call for call routing (transfer_call).
  • Parse Gemini's function call responses to determine the appropriate call routing.
  • Handle error conditions and timeout scenarios with fallback transfers.
note

The model specified in this script is gemini-2.0-flash-exp. You must verify its current availability under your Google Cloud plan. Additionally, check for potential rate limit/cost impacts and identify a suitable alternative model should the version be inaccessible.

//
// Gemini integration
//
// (C) Vodia Networks 2025
//
// This file is property of Vodia Networks Inc. All rights reserved.
// For more information mail Vodia Networks Inc., info@vodia.com.
//
'use strict';
var apiKey = "YOUR_GEMINI_API_KEY"
var codec = "pcm16"

var texts = {
initial: {
en: "How may I help you today?"
}
}

function text(name) {
var prompt = texts[name]
if (call.lang in prompt) return prompt[call.lang];
return prompt["en"]
}

call.say({text: text("initial")});

var timer = setTimeout(function() {
console.log("TIMEOUT: No transfer in 120 seconds, transferring to 700")
call.transfer('700')
}, 120000);

var ws = new Websocket(
"wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent?key=" + apiKey
)

ws.header([
{ name: "Content-Type", value: "application/json" }
])

ws.on('open', function() {
console.log("=== Gemini WebSocket OPENED at " + new Date().toISOString() + " ===")

var setup = {
"setup": {
"model": "models/gemini-2.0-flash-exp",
"generation_config": {
"response_modalities": ["AUDIO"],
"speech_config": {
"voice_config": {
"prebuilt_voice_config": {
"voice_name": "Puck"
}
}
}
},
"system_instruction": {
"parts": [{
"text": "You are a friendly, professional phone assistant for Vodia Networks. Your goal is to help callers reach the right person. \n\nWhen a caller asks for a department, respond naturally (e.g., 'Sure thing, I'll connect you to Sales now' or 'One moment while I get Bob for you'). \n\nExtensions:\n- Sales: 502\n- Support: 501\n- Marketing: 503\n- Bob: 451\n- Default/Other: 700\n\nIMPORTANT: You must say your verbal confirmation completely before triggering the transfer_call tool."
}]
},
"tools": [{
"function_declarations": [{
"name": "transfer_call",
"description": "Transfers the call to an extension",
"parameters": {
"type": "object",
"properties": {
"destination": {
"type": "string",
"description": "Extension number like '501' or '502'"
}
},
"required": ["destination"]
}
}]
}]
}
}

console.log("Sending setup to Gemini...")
ws.send(JSON.stringify(setup))
})

ws.on('error', function(error) {
console.log("!!! WebSocket ERROR: " + error)
call.say({text: "I'm experiencing technical difficulties. Transferring you now."})
setTimeout(function() { call.transfer('700') }, 2000)
})

ws.on('close', function(code, reason) {
console.log("=== Gemini WebSocket CLOSED: code=" + code + ", reason=" + reason + " ===")
call.stream()
})

var messageCount = 0
var audioReceived = false
var transferInitiated = false

ws.on('message', function(message) {
messageCount++
var msg = JSON.parse(message)

var msgType = msg.setupComplete ? "SETUP_COMPLETE" :
msg.serverContent ? "SERVER_CONTENT" :
msg.toolCall ? "TOOL_CALL" :
msg.error ? "ERROR" : "UNKNOWN"

console.log("[MSG #" + messageCount + "] Type: " + msgType)

if (msg.error) {
console.log("!!! GEMINI ERROR: " + JSON.stringify(msg.error))
call.say({text: "Sorry, I'm having trouble. Let me transfer you."})
setTimeout(function() { call.transfer('700') }, 2000)
return
}

if (msg.setupComplete) {
console.log(">>> Starting audio stream")
call.stream({
codec: codec,
interval: 0.5,
samplerate: 16000,
callback: stream
})
}
else if (msg.serverContent) {
if (msg.serverContent.modelTurn) {
var parts = msg.serverContent.modelTurn.parts

for (var i = 0; i < parts.length; i++) {
var part = parts[i]

if (part.text) {
console.log(">>> Gemini said: " + part.text)
}

if (part.inlineData && part.inlineData.mimeType.startsWith("audio/")) {
if (!audioReceived) {
console.log(">>> First audio received from Gemini")
audioReceived = true
}
var audio = fromBase64String(part.inlineData.data)
call.play({
direction: "out",
codec: codec,
audio: audio
})
}

if (part.functionCall) {
handleTransfer(part.functionCall)
}
}
}
}
else if (msg.toolCall) {
console.log(">>> Tool call: " + JSON.stringify(msg.toolCall))

if (msg.toolCall.functionCalls && msg.toolCall.functionCalls.length > 0) {
for (var i = 0; i < msg.toolCall.functionCalls.length; i++) {
handleTransfer(msg.toolCall.functionCalls[i])
}
}
}
})

function handleTransfer(functionCall) {
if (transferInitiated) {
console.log(">>> Transfer already initiated, ignoring duplicate")
return
}

if (functionCall.name === "transfer_call") {
var destination = functionCall.args.destination
console.log(">>> TRANSFER REQUEST: " + destination)

if (!destination || destination === "") {
console.log("!!! Empty destination, using 700")
destination = "700"
}

transferInitiated = true

if (timer) {
clearTimeout(timer)
timer = null
}

console.log(">>> Waiting 2.5s for Gemini to finish speaking...")
setTimeout(function() {
console.log(">>> Executing transfer to: " + destination)
call.mute()
call.transfer(destination)
}, 2500)

if (functionCall.id) {
ws.send(JSON.stringify({
"toolResponse": {
"functionResponses": [{
"id": functionCall.id,
"name": "transfer_call",
"response": {
"success": true
}
}]
}
}))
}
}
}

var streamCount = 0
function stream(audio) {
streamCount++

if (streamCount === 1) {
console.log(">>> First audio chunk sent to Gemini, size: " + audio.length + " bytes")
}
if (streamCount % 100 === 0) {
console.log(">>> Sent " + streamCount + " audio chunks to Gemini")
}

var frame = JSON.stringify({
"realtimeInput": {
"media_chunks": [{
"mime_type": "audio/pcm;rate=16000",
"data": toBase64String(audio)
}]
}
})
ws.send(frame)
}

ws.connect()

For more information on Vodia's JavaScript capabilities, refer to: Vodia Backend JavaScript Documentation