[go: nahoru, domu]

Skip to content
This repository has been archived by the owner on Oct 19, 2023. It is now read-only.

Commit

Permalink
google-assistant-sdk/pushtotalk: update sample to v1alpha2
Browse files Browse the repository at this point in the history
- Converse -> Assist
- ConverseRequest -> AssistRequest
- ConverseResponse -> AssistResponse
- ConverseState -> DialogStateIn
- ConverseResult -> DialogStateOut
- InterimSpokenRequestTextList -> SpeechRecognitionResult

Bug: 69066657
Change-Id: I0c84c743529c122cc0b7445cfb66aef8eb7cbe4e
  • Loading branch information
proppy committed Nov 21, 2017
1 parent 9b0cb2d commit bded2ca
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 72 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,42 +16,39 @@

import logging

from google.assistant.embedded.v1alpha1 import embedded_assistant_pb2
from google.assistant.embedded.v1alpha2 import embedded_assistant_pb2


END_OF_UTTERANCE = embedded_assistant_pb2.ConverseResponse.END_OF_UTTERANCE


def log_converse_request_without_audio(converse_request):
"""Log ConverseRequest fields without audio data."""
def log_assist_request_without_audio(assist_request):
"""Log AssistRequest fields without audio data."""
if logging.getLogger().isEnabledFor(logging.DEBUG):
resp_copy = embedded_assistant_pb2.ConverseRequest()
resp_copy.CopyFrom(converse_request)
resp_copy = embedded_assistant_pb2.AssistRequest()
resp_copy.CopyFrom(assist_request)
if len(resp_copy.audio_in) > 0:
size = len(resp_copy.audio_in)
resp_copy.ClearField('audio_in')
logging.debug('ConverseRequest: audio_in (%d bytes)',
logging.debug('AssistRequest: audio_in (%d bytes)',
size)
return
logging.debug('ConverseRequest: %s', resp_copy)
logging.debug('AssistRequest: %s', resp_copy)


def log_converse_response_without_audio(converse_response):
"""Log ConverseResponse fields without audio data."""
def log_assist_response_without_audio(assist_response):
"""Log AssistResponse fields without audio data."""
if logging.getLogger().isEnabledFor(logging.DEBUG):
resp_copy = embedded_assistant_pb2.ConverseResponse()
resp_copy.CopyFrom(converse_response)
resp_copy = embedded_assistant_pb2.AssistResponse()
resp_copy.CopyFrom(assist_response)
has_audio_data = (resp_copy.HasField('audio_out') and
len(resp_copy.audio_out.audio_data) > 0)
if has_audio_data:
size = len(resp_copy.audio_out.audio_data)
resp_copy.audio_out.ClearField('audio_data')
if resp_copy.audio_out.ListFields():
logging.debug('ConverseResponse: %s audio_data (%d bytes)',
logging.debug('AssistResponse: %s audio_data (%d bytes)',
resp_copy,
size)
else:
logging.debug('ConverseResponse: audio_data (%d bytes)',
logging.debug('AssistResponse: audio_data (%d bytes)',
size)
return
logging.debug('ConverseResponse: %s', resp_copy)
logging.debug('AssistResponse: %s', resp_copy)
109 changes: 54 additions & 55 deletions google-assistant-sdk/googlesamples/assistant/grpc/pushtotalk.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,10 @@
import google.auth.transport.requests
import google.oauth2.credentials

from google.assistant.embedded.v1alpha1 import (
from google.assistant.embedded.v1alpha2 import (
embedded_assistant_pb2,
embedded_assistant_pb2_grpc
)
from google.rpc import code_pb2
from tenacity import retry, stop_after_attempt, retry_if_exception

try:
Expand All @@ -47,9 +46,9 @@


ASSISTANT_API_ENDPOINT = 'embeddedassistant.googleapis.com'
END_OF_UTTERANCE = embedded_assistant_pb2.ConverseResponse.END_OF_UTTERANCE
DIALOG_FOLLOW_ON = embedded_assistant_pb2.ConverseResult.DIALOG_FOLLOW_ON
CLOSE_MICROPHONE = embedded_assistant_pb2.ConverseResult.CLOSE_MICROPHONE
END_OF_UTTERANCE = embedded_assistant_pb2.AssistResponse.END_OF_UTTERANCE
DIALOG_FOLLOW_ON = embedded_assistant_pb2.DialogStateOut.DIALOG_FOLLOW_ON
CLOSE_MICROPHONE = embedded_assistant_pb2.DialogStateOut.CLOSE_MICROPHONE
DEFAULT_GRPC_DEADLINE = 60 * 3 + 5


Expand All @@ -67,16 +66,18 @@ class SampleAssistant(object):
device_handler: callback for device actions.
"""

def __init__(self, device_model_id, device_id, conversation_stream,
def __init__(self, language_code, device_model_id, device_id,
conversation_stream,
channel, deadline_sec, device_handler):
self.language_code = language_code
self.device_model_id = device_model_id
self.device_id = device_id
self.conversation_stream = conversation_stream

# Opaque blob provided in ConverseResponse that,
# when provided in a follow-up ConverseRequest,
# Opaque blob provided in AssistResponse that,
# when provided in a follow-up AssistRequest,
# gives the Assistant a context marker within the current state
# of the multi-Converse()-RPC "conversation".
# of the multi-Assist()-RPC "conversation".
# This value, along with MicrophoneMode, supports a more natural
# "conversation" with the Assistant.
self.conversation_state = None
Expand Down Expand Up @@ -106,7 +107,7 @@ def is_grpc_error_unavailable(e):

@retry(reraise=True, stop=stop_after_attempt(3),
retry=retry_if_exception(is_grpc_error_unavailable))
def converse(self):
def assist(self):
"""Send a voice request to the Assistant and playback the response.
Returns: True if conversation should continue.
Expand All @@ -117,46 +118,39 @@ def converse(self):
self.conversation_stream.start_recording()
logging.info('Recording audio request.')

def iter_converse_requests():
for c in self.gen_converse_requests():
assistant_helpers.log_converse_request_without_audio(c)
def iter_assist_requests():
for c in self.gen_assist_requests():
assistant_helpers.log_assist_request_without_audio(c)
yield c
self.conversation_stream.start_playback()

# This generator yields ConverseResponse proto messages
# This generator yields AssistResponse proto messages
# received from the gRPC Google Assistant API.
for resp in self.assistant.Converse(iter_converse_requests(),
self.deadline):
assistant_helpers.log_converse_response_without_audio(resp)
if resp.error.code != code_pb2.OK:
logging.error('server error: %s', resp.error.message)
break
for resp in self.assistant.Assist(iter_assist_requests(),
self.deadline):
assistant_helpers.log_assist_response_without_audio(resp)
if resp.event_type == END_OF_UTTERANCE:
logging.info('End of audio request detected')
self.conversation_stream.stop_recording()
if resp.result.spoken_request_text:
if resp.speech_results:
logging.info('Transcript of user request: "%s".',
resp.result.spoken_request_text)
' '.join(r.transcript
for r in resp.speech_results))
logging.info('Playing assistant response.')
if len(resp.audio_out.audio_data) > 0:
self.conversation_stream.write(resp.audio_out.audio_data)
if resp.result.spoken_response_text:
logging.info(
'Transcript of TTS response '
'(only populated from IFTTT): "%s".',
resp.result.spoken_response_text)
if resp.result.conversation_state:
self.conversation_state = resp.result.conversation_state
if resp.result.volume_percentage != 0:
logging.info('Setting volume to %s%%',
resp.result.volume_percentage)
self.conversation_stream.volume_percentage = (
resp.result.volume_percentage
)
if resp.result.microphone_mode == DIALOG_FOLLOW_ON:
if resp.dialog_state_out.conversation_state:
conversation_state = resp.dialog_state_out.conversation_state
logging.debug('Updating conversation state.')
self.conversation_state = conversation_state
if resp.dialog_state_out.volume_percentage != 0:
volume_percentage = resp.dialog_state_out.volume_percentage
logging.info('Setting volume to %s%%', volume_percentage)
self.conversation_stream.volume_percentage = volume_percentage
if resp.dialog_state_out.microphone_mode == DIALOG_FOLLOW_ON:
continue_conversation = True
logging.info('Expecting follow-on query from user.')
elif resp.result.microphone_mode == CLOSE_MICROPHONE:
elif resp.dialog_state_out.microphone_mode == CLOSE_MICROPHONE:
continue_conversation = False
if resp.device_action.device_request_json:
device_request = json.loads(
Expand All @@ -174,17 +168,17 @@ def iter_converse_requests():
self.conversation_stream.stop_playback()
return continue_conversation

def gen_converse_requests(self):
"""Yields: ConverseRequest messages to send to the API."""
def gen_assist_requests(self):
"""Yields: AssistRequest messages to send to the API."""

converse_state = None
if self.conversation_state:
logging.debug('Sending converse_state: %s',
self.conversation_state)
converse_state = embedded_assistant_pb2.ConverseState(
conversation_state=self.conversation_state,
dialog_state_in = embedded_assistant_pb2.DialogStateIn(
language_code=self.language_code,
conversation_state=b''
)
config = embedded_assistant_pb2.ConverseConfig(
if self.conversation_state:
logging.debug('Sending conversation state.')
dialog_state_in.conversation_state = self.conversation_state
config = embedded_assistant_pb2.AssistConfig(
audio_in_config=embedded_assistant_pb2.AudioInConfig(
encoding='LINEAR16',
sample_rate_hertz=self.conversation_stream.sample_rate,
Expand All @@ -194,18 +188,18 @@ def gen_converse_requests(self):
sample_rate_hertz=self.conversation_stream.sample_rate,
volume_percentage=self.conversation_stream.volume_percentage,
),
converse_state=converse_state,
dialog_state_in=dialog_state_in,
device_config=embedded_assistant_pb2.DeviceConfig(
device_model_id=self.device_model_id,
device_id=self.device_id,
device_model_id=self.device_model_id,
)
)
# The first ConverseRequest must contain the ConverseConfig
# The first AssistRequest must contain the AssistConfig
# and no audio data.
yield embedded_assistant_pb2.ConverseRequest(config=config)
yield embedded_assistant_pb2.AssistRequest(config=config)
for data in self.conversation_stream:
# Subsequent requests need audio data, but not config.
yield embedded_assistant_pb2.ConverseRequest(audio_in=data)
yield embedded_assistant_pb2.AssistRequest(audio_in=data)


@click.command()
Expand Down Expand Up @@ -237,6 +231,10 @@ def gen_converse_requests(self):
click.get_app_dir('googlesamples-assistant'),
'device_config.json'),
help='Path to save and restore the device configuration')
@click.option('--lang', show_default=True,
metavar='<language code>',
default='en-US',
help='Language code of the Assistant')
@click.option('--verbose', '-v', is_flag=True, default=False,
help='Verbose logging.')
@click.option('--input-audio-file', '-i',
Expand Down Expand Up @@ -275,7 +273,7 @@ def gen_converse_requests(self):
@click.option('--once', default=False, is_flag=True,
help='Force termination after a single conversation.')
def main(api_endpoint, credentials, project,
device_model_id, device_id, device_config, verbose,
device_model_id, device_id, device_config, lang, verbose,
input_audio_file, output_audio_file,
audio_sample_rate, audio_sample_width,
audio_iter_size, audio_block_size, audio_flush_size,
Expand Down Expand Up @@ -398,13 +396,14 @@ def onoff(on):
with open(device_config, 'w') as f:
json.dump(payload, f)

with SampleAssistant(device_model_id, device_id, conversation_stream,
with SampleAssistant(lang, device_model_id, device_id,
conversation_stream,
grpc_channel, grpc_deadline,
device_handler) as assistant:
# If file arguments are supplied:
# exit after the first turn of the conversation.
if input_audio_file or output_audio_file:
assistant.converse()
assistant.assist()
return

# If no file arguments supplied:
Expand All @@ -415,7 +414,7 @@ def onoff(on):
while True:
if wait_for_user_trigger:
click.pause(info='Press Enter to send a new request...')
continue_conversation = assistant.converse()
continue_conversation = assistant.assist()
# wait for user trigger if there is no follow-up turn in
# the conversation.
wait_for_user_trigger = not continue_conversation
Expand Down

0 comments on commit bded2ca

Please sign in to comment.