1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16package android.speech.tts; 17 18import android.annotation.NonNull; 19import android.media.AudioFormat; 20import android.speech.tts.TextToSpeechService.UtteranceProgressDispatcher; 21import android.util.Log; 22 23import java.io.IOException; 24import java.nio.ByteBuffer; 25import java.nio.ByteOrder; 26import java.nio.channels.FileChannel; 27 28/** 29 * Speech synthesis request that writes the audio to a WAV file. 30 */ 31class FileSynthesisCallback extends AbstractSynthesisCallback { 32 33 private static final String TAG = "FileSynthesisRequest"; 34 private static final boolean DBG = false; 35 36 private static final int MAX_AUDIO_BUFFER_SIZE = 8192; 37 38 private static final int WAV_HEADER_LENGTH = 44; 39 private static final short WAV_FORMAT_PCM = 0x0001; 40 41 private final Object mStateLock = new Object(); 42 43 private int mSampleRateInHz; 44 private int mAudioFormat; 45 private int mChannelCount; 46 47 private FileChannel mFileChannel; 48 49 private final UtteranceProgressDispatcher mDispatcher; 50 51 private boolean mStarted = false; 52 private boolean mDone = false; 53 54 /** Status code of synthesis */ 55 protected int mStatusCode; 56 57 FileSynthesisCallback(@NonNull FileChannel fileChannel, 58 @NonNull UtteranceProgressDispatcher dispatcher, boolean clientIsUsingV2) { 59 super(clientIsUsingV2); 60 mFileChannel = fileChannel; 61 mDispatcher = dispatcher; 62 mStatusCode = TextToSpeech.SUCCESS; 63 } 64 65 @Override 66 void stop() { 67 synchronized (mStateLock) { 68 if (mDone) { 69 return; 70 } 71 if (mStatusCode == TextToSpeech.STOPPED) { 72 return; 73 } 74 75 mStatusCode = TextToSpeech.STOPPED; 76 cleanUp(); 77 mDispatcher.dispatchOnStop(); 78 } 79 } 80 81 /** 82 * Must be called while holding the monitor on {@link #mStateLock}. 83 */ 84 private void cleanUp() { 85 closeFile(); 86 } 87 88 /** 89 * Must be called while holding the monitor on {@link #mStateLock}. 90 */ 91 private void closeFile() { 92 // File will be closed by the SpeechItem in the speech service. 93 mFileChannel = null; 94 } 95 96 @Override 97 public int getMaxBufferSize() { 98 return MAX_AUDIO_BUFFER_SIZE; 99 } 100 101 @Override 102 public int start(int sampleRateInHz, int audioFormat, int channelCount) { 103 if (DBG) { 104 Log.d(TAG, "FileSynthesisRequest.start(" + sampleRateInHz + "," + audioFormat 105 + "," + channelCount + ")"); 106 } 107 if (audioFormat != AudioFormat.ENCODING_PCM_8BIT && 108 audioFormat != AudioFormat.ENCODING_PCM_16BIT && 109 audioFormat != AudioFormat.ENCODING_PCM_FLOAT) { 110 Log.e(TAG, "Audio format encoding " + audioFormat + " not supported. Please use one " + 111 "of AudioFormat.ENCODING_PCM_8BIT, AudioFormat.ENCODING_PCM_16BIT or " + 112 "AudioFormat.ENCODING_PCM_FLOAT"); 113 } 114 mDispatcher.dispatchOnBeginSynthesis(sampleRateInHz, audioFormat, channelCount); 115 116 FileChannel fileChannel = null; 117 synchronized (mStateLock) { 118 if (mStatusCode == TextToSpeech.STOPPED) { 119 if (DBG) Log.d(TAG, "Request has been aborted."); 120 return errorCodeOnStop(); 121 } 122 if (mStatusCode != TextToSpeech.SUCCESS) { 123 if (DBG) Log.d(TAG, "Error was raised"); 124 return TextToSpeech.ERROR; 125 } 126 if (mStarted) { 127 Log.e(TAG, "Start called twice"); 128 return TextToSpeech.ERROR; 129 } 130 mStarted = true; 131 mSampleRateInHz = sampleRateInHz; 132 mAudioFormat = audioFormat; 133 mChannelCount = channelCount; 134 135 mDispatcher.dispatchOnStart(); 136 fileChannel = mFileChannel; 137 } 138 139 try { 140 fileChannel.write(ByteBuffer.allocate(WAV_HEADER_LENGTH)); 141 return TextToSpeech.SUCCESS; 142 } catch (IOException ex) { 143 Log.e(TAG, "Failed to write wav header to output file descriptor", ex); 144 synchronized (mStateLock) { 145 cleanUp(); 146 mStatusCode = TextToSpeech.ERROR_OUTPUT; 147 } 148 return TextToSpeech.ERROR; 149 } 150 } 151 152 @Override 153 public int audioAvailable(byte[] buffer, int offset, int length) { 154 if (DBG) { 155 Log.d(TAG, "FileSynthesisRequest.audioAvailable(" + buffer + "," + offset 156 + "," + length + ")"); 157 } 158 FileChannel fileChannel = null; 159 synchronized (mStateLock) { 160 if (mStatusCode == TextToSpeech.STOPPED) { 161 if (DBG) Log.d(TAG, "Request has been aborted."); 162 return errorCodeOnStop(); 163 } 164 if (mStatusCode != TextToSpeech.SUCCESS) { 165 if (DBG) Log.d(TAG, "Error was raised"); 166 return TextToSpeech.ERROR; 167 } 168 if (mFileChannel == null) { 169 Log.e(TAG, "File not open"); 170 mStatusCode = TextToSpeech.ERROR_OUTPUT; 171 return TextToSpeech.ERROR; 172 } 173 if (!mStarted) { 174 Log.e(TAG, "Start method was not called"); 175 return TextToSpeech.ERROR; 176 } 177 fileChannel = mFileChannel; 178 } 179 180 final byte[] bufferCopy = new byte[length]; 181 System.arraycopy(buffer, offset, bufferCopy, 0, length); 182 mDispatcher.dispatchOnAudioAvailable(bufferCopy); 183 184 try { 185 fileChannel.write(ByteBuffer.wrap(buffer, offset, length)); 186 return TextToSpeech.SUCCESS; 187 } catch (IOException ex) { 188 Log.e(TAG, "Failed to write to output file descriptor", ex); 189 synchronized (mStateLock) { 190 cleanUp(); 191 mStatusCode = TextToSpeech.ERROR_OUTPUT; 192 } 193 return TextToSpeech.ERROR; 194 } 195 } 196 197 @Override 198 public int done() { 199 if (DBG) Log.d(TAG, "FileSynthesisRequest.done()"); 200 FileChannel fileChannel = null; 201 202 int sampleRateInHz = 0; 203 int audioFormat = 0; 204 int channelCount = 0; 205 206 synchronized (mStateLock) { 207 if (mDone) { 208 Log.w(TAG, "Duplicate call to done()"); 209 // This is not an error that would prevent synthesis. Hence no 210 // setStatusCode is set. 211 return TextToSpeech.ERROR; 212 } 213 if (mStatusCode == TextToSpeech.STOPPED) { 214 if (DBG) Log.d(TAG, "Request has been aborted."); 215 return errorCodeOnStop(); 216 } 217 if (mStatusCode != TextToSpeech.SUCCESS && mStatusCode != TextToSpeech.STOPPED) { 218 mDispatcher.dispatchOnError(mStatusCode); 219 return TextToSpeech.ERROR; 220 } 221 if (mFileChannel == null) { 222 Log.e(TAG, "File not open"); 223 return TextToSpeech.ERROR; 224 } 225 mDone = true; 226 fileChannel = mFileChannel; 227 sampleRateInHz = mSampleRateInHz; 228 audioFormat = mAudioFormat; 229 channelCount = mChannelCount; 230 } 231 232 try { 233 // Write WAV header at start of file 234 fileChannel.position(0); 235 int dataLength = (int) (fileChannel.size() - WAV_HEADER_LENGTH); 236 fileChannel.write( 237 makeWavHeader(sampleRateInHz, audioFormat, channelCount, dataLength)); 238 239 synchronized (mStateLock) { 240 closeFile(); 241 mDispatcher.dispatchOnSuccess(); 242 return TextToSpeech.SUCCESS; 243 } 244 } catch (IOException ex) { 245 Log.e(TAG, "Failed to write to output file descriptor", ex); 246 synchronized (mStateLock) { 247 cleanUp(); 248 } 249 return TextToSpeech.ERROR; 250 } 251 } 252 253 @Override 254 public void error() { 255 error(TextToSpeech.ERROR_SYNTHESIS); 256 } 257 258 @Override 259 public void error(int errorCode) { 260 if (DBG) Log.d(TAG, "FileSynthesisRequest.error()"); 261 synchronized (mStateLock) { 262 if (mDone) { 263 return; 264 } 265 cleanUp(); 266 mStatusCode = errorCode; 267 } 268 } 269 270 @Override 271 public boolean hasStarted() { 272 synchronized (mStateLock) { 273 return mStarted; 274 } 275 } 276 277 @Override 278 public boolean hasFinished() { 279 synchronized (mStateLock) { 280 return mDone; 281 } 282 } 283 284 private ByteBuffer makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount, 285 int dataLength) { 286 int sampleSizeInBytes = AudioFormat.getBytesPerSample(audioFormat); 287 int byteRate = sampleRateInHz * sampleSizeInBytes * channelCount; 288 short blockAlign = (short) (sampleSizeInBytes * channelCount); 289 short bitsPerSample = (short) (sampleSizeInBytes * 8); 290 291 byte[] headerBuf = new byte[WAV_HEADER_LENGTH]; 292 ByteBuffer header = ByteBuffer.wrap(headerBuf); 293 header.order(ByteOrder.LITTLE_ENDIAN); 294 295 header.put(new byte[]{ 'R', 'I', 'F', 'F' }); 296 header.putInt(dataLength + WAV_HEADER_LENGTH - 8); // RIFF chunk size 297 header.put(new byte[]{ 'W', 'A', 'V', 'E' }); 298 header.put(new byte[]{ 'f', 'm', 't', ' ' }); 299 header.putInt(16); // size of fmt chunk 300 header.putShort(WAV_FORMAT_PCM); 301 header.putShort((short) channelCount); 302 header.putInt(sampleRateInHz); 303 header.putInt(byteRate); 304 header.putShort(blockAlign); 305 header.putShort(bitsPerSample); 306 header.put(new byte[]{ 'd', 'a', 't', 'a' }); 307 header.putInt(dataLength); 308 header.flip(); 309 310 return header; 311 } 312} 313