介绍
Amazon Polly是一项将文本转换为逼真语音的服务,借助它,您可以创建可通话的应用程序,并构建全新类别的语音功能。
根据业务需求我们对aws sdk 进行了封装,由于只有知语支持普通话,所以默认使用知语进行朗读。
另外我们封装了朗读速度和音量的控制,定义了ProsodyRate和ProsodyVolume枚举类,其他请参考AWSPollyClient类中的方法。
添加pom依赖
<dependency><groupId>com.walltech</groupId><artifactId>walltech-awsclient</artifactId></dependency>
快速开始
import com.walltech.aws.polly.AWSPollyClient;import com.walltech.aws.util.AWSUtils;import org.junit.jupiter.api.Test;import software.amazon.awssdk.services.polly.PollyClient;import java.io.FileOutputStream;/*** @author mori* @date 2022/9/8*/public class PollyTest {private static final String SAMPLE = "早上好,请先扫描再称重, please scan first and then weigh,thank you, 澳大利亚, AP-123.";@Testpublic void testHello() {// 目前只有cn north west client可用PollyClient pollyClient = AWSUtils.getPollyCNNorthWestClient();try (FileOutputStream fileOutputStream = new FileOutputStream("./sample.mp3");) {// 合成mp3流 写入到outputAWSPollyClient.synthesizeMp3ByZhiyu(pollyClient, SAMPLE, fileOutputStream);} catch (Exception e) {e.printStackTrace();}}}
AWSPollyClient
import com.walltech.aws.polly.enums.ProsodyRate;import com.walltech.aws.polly.enums.ProsodyVolume;import com.walltech.aws.s3.AWSS3Client;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import software.amazon.awssdk.services.polly.PollyClient;import software.amazon.awssdk.services.polly.model.OutputFormat;import software.amazon.awssdk.services.polly.model.SynthesizeSpeechRequest;import software.amazon.awssdk.services.polly.model.TextType;import software.amazon.awssdk.services.polly.model.VoiceId;import software.amazon.awssdk.utils.IoUtils;import java.io.InputStream;import java.io.OutputStream;/*** @author mori* @date 2022/9/9*/public class AWSPollyClient {private static final Logger logger = LoggerFactory.getLogger(AWSS3Client.class);public static void synthesizeMp3ByZhiyu(PollyClient polly, String text, OutputStream outputStream) {synthesizeMp3ByZhiyu(polly, ProsodyVolume.X_LOUD.toString(), ProsodyRate.X_FAST.toString(), text, outputStream);}public static void synthesizeMp3ByZhiyu(PollyClient polly, ProsodyVolume volume, ProsodyRate prosodyRate, String text, OutputStream outputStream) {synthesizeMp3ByZhiyu(polly, volume.toString(), prosodyRate.toString(), text, outputStream);}public static void synthesizeMp3ByZhiyu(PollyClient polly, String volume, String rate, String text, OutputStream outputStream) {try (InputStream inputStream = synthesizeMp3ByZhiyu(polly, volume, rate, text)) {if (inputStream != null) {IoUtils.copy(inputStream, outputStream);}} catch (Exception e) {logger.error("Synthesize mp3 failed. error: {}", e.getMessage());}}public static InputStream synthesizeMp3ByZhiyu(PollyClient polly, String text) {return synthesizeMp3ByZhiyu(polly, ProsodyVolume.X_LOUD.toString(), ProsodyRate.X_FAST.toString(), text);}public static InputStream synthesizeMp3ByZhiyu(PollyClient polly, ProsodyVolume volume, ProsodyRate rate, String text) {return synthesizeMp3ByZhiyu(polly, volume.toString(), rate.toString(), text);}public static InputStream synthesizeMp3ByZhiyu(PollyClient polly, String volume, String rate, String text) {text = "<speak><prosody volume=\"" + volume + "\" rate=\"" + rate + "\">" + text + " <break/></prosody></speak>";SynthesizeSpeechRequest synthReq = SynthesizeSpeechRequest.builder().text(text).textType(TextType.SSML).voiceId(VoiceId.ZHIYU).outputFormat(OutputFormat.MP3).build();try {return polly.synthesizeSpeech(synthReq);} catch (Exception e) {logger.error("Synthesize mp3 failed. error: {}", e.getMessage());}return null;}}
