第一句子网 > ROS 教程3 机器人语音语音识别理解合成控制 ASR NLU TTS

ROS 教程3 机器人语音语音识别理解合成控制 ASR NLU TTS

时间：2021-08-13 17:05:51

机器人语音语音识别理解合成控制 ASR NLU TTS

github

一、语音处理总体框架

1. 语音识别（ASR ， Automatic Speech Recognition ）2. 语义理解（NLU ， Natural Language Understanding）e. 语音合成（TTS ， Text To Speech）1. 语音识别 **ASR**：支持的包：国外：CMU SPhinx ——> pocketsphinx国内：科大迅飞等。。2. 语义理解 NLU：图灵3. 语音合成 TTS：国外：Festival ——> sound_play 是 ros-indigo-audio-common 的一部分国内：科大迅飞等。。。

二、国外库

1、语音识别 pocketsphinx

1) 安装sudo apt-get install gstreamer0.10-pocketsphinx # 原生系统sudo apt-get install ros-indigo-pocketsphinx# ros接口支持sudo apt-get install ros-indigo-audio-common# 包含了sound_play TTSsudo apt-get install libasound2 # 语音驱动sudo apt-get install gstreamer0.10-gconf# GStreamer组件2) 测试pocketsphinx包包含了一个节点 recognizer.py获取硬件语音的输入流，在已有的语音库里匹配语音相应的单词并发布到 /recognizer/output 话题适合robotcup的一个语音库测试：roslaunch pocketsphinx robocup.launch说话测试显示话题消息：rostopic echo /recognizer/output查看语音库：roscd pocketsphinx/demomore robocup.corpus显示只有说了语音库内的语音才能得到较满意的结果

# robocup.launch<launch><node name="recognizer" pkg="pocketsphinx" type="recognizer.py" output="screen"> # 识别器<param name="lm" value="$(find pocketsphinx)/demo/robocup.lm"/># 语言模型在线工具根据语言库生成<param name="dict" value="$(find pocketsphinx)/demo/robocup.dic"/> # 语言词典</node></launch>

# recognizer.py 文件import roslib; roslib.load_manifest('pocketsphinx')import rospyimport pygtkpygtk.require('2.0')import gtkimport gobjectimport pygstpygst.require('0.10')gobject.threads_init()import gstfrom std_msgs.msg import Stringfrom std_srvs.srv import *import osimport commandsclass recognizer(object):""" GStreamer based speech recognizer. """def __init__(self):# Start noderospy.init_node("recognizer")self._device_name_param = "~mic_name" # Find the name of your microphone by typing pacmd list-sources in the terminalself._lm_param = "~lm"self._dic_param = "~dict"# Configure mics with gstreamer launch configif rospy.has_param(self._device_name_param):self.device_name = rospy.get_param(self._device_name_param)self.device_index = self.pulse_index_from_name(self.device_name)self.launch_config = "pulsesrc device=" + str(self.device_index)rospy.loginfo("Using: pulsesrc device=%s name=%s", self.device_index, self.device_name)elif rospy.has_param('~source'):# common sources: 'alsasrc'self.launch_config = rospy.get_param('~source')else:self.launch_config = 'gconfaudiosrc'rospy.loginfo("Launch config: %s", self.launch_config)self.launch_config += " ! audioconvert ! audioresample " \+ '! vader name=vad auto-threshold=true ' \+ '! pocketsphinx name=asr ! fakesink'# Configure ROS settingsself.started = Falserospy.on_shutdown(self.shutdown)self.pub = rospy.Publisher('~output', String)rospy.Service("~start", Empty, self.start)rospy.Service("~stop", Empty, self.stop)if rospy.has_param(self._lm_param) and rospy.has_param(self._dic_param):self.start_recognizer()else:rospy.logwarn("lm and dic parameters need to be set to start recognizer.")def start_recognizer(self):rospy.loginfo("Starting recognizer... ")self.pipeline = gst.parse_launch(self.launch_config)self.asr = self.pipeline.get_by_name('asr')self.asr.connect('partial_result', self.asr_partial_result)self.asr.connect('result', self.asr_result)self.asr.set_property('configured', True)self.asr.set_property('dsratio', 1)# Configure language modelif rospy.has_param(self._lm_param):lm = rospy.get_param(self._lm_param)else:rospy.logerr('Recognizer not started. Please specify a language model file.')returnif rospy.has_param(self._dic_param):dic = rospy.get_param(self._dic_param)else:rospy.logerr('Recognizer not started. Please specify a dictionary.')returnself.asr.set_property('lm', lm)self.asr.set_property('dict', dic)self.bus = self.pipeline.get_bus()self.bus.add_signal_watch()self.bus_id = self.bus.connect('message::application', self.application_message)self.pipeline.set_state(gst.STATE_PLAYING)self.started = Truedef pulse_index_from_name(self, name):output = commands.getstatusoutput("pacmd list-sources | grep -B 1 'name: <" + name + ">' | grep -o -P '(?<=index: )[0-9]*'")if len(output) == 2:return output[1]else:raise Exception("Error. pulse index doesn't exist for name: " + name)def stop_recognizer(self):if self.started:self.pipeline.set_state(gst.STATE_NULL)self.pipeline.remove(self.asr)self.bus.disconnect(self.bus_id)self.started = Falsedef shutdown(self):""" Delete any remaining parameters so they don't affect next launch """for param in [self._device_name_param, self._lm_param, self._dic_param]:if rospy.has_param(param):rospy.delete_param(param)""" Shutdown the GTK thread. """gtk.main_quit()def start(self, req):self.start_recognizer()rospy.loginfo("recognizer started")return EmptyResponse()def stop(self, req):self.stop_recognizer()rospy.loginfo("recognizer stopped")return EmptyResponse()def asr_partial_result(self, asr, text, uttid):""" Forward partial result signals on the bus to the main thread. """struct = gst.Structure('partial_result')struct.set_value('hyp', text)struct.set_value('uttid', uttid)asr.post_message(gst.message_new_application(asr, struct))def asr_result(self, asr, text, uttid):""" Forward result signals on the bus to the main thread. """struct = gst.Structure('result')struct.set_value('hyp', text)struct.set_value('uttid', uttid)asr.post_message(gst.message_new_application(asr, struct))def application_message(self, bus, msg):""" Receive application messages from the bus. """msgtype = msg.structure.get_name()if msgtype == 'partial_result':self.partial_result(msg.structure['hyp'], msg.structure['uttid'])if msgtype == 'result':self.final_result(msg.structure['hyp'], msg.structure['uttid'])def partial_result(self, hyp, uttid):""" Delete any previous selection, insert text and select it. """rospy.logdebug("Partial: " + hyp)def final_result(self, hyp, uttid):""" Insert the final result. """msg = String()msg.data = str(hyp.lower())rospy.loginfo(msg.data)self.pub.publish(msg)if __name__ == "__main__":start = recognizer()gtk.main()

#!/usr/bin/python# -*- coding:utf-8 -*-### 修改后的文件import roslibroslib.load_manifest('pocketsphinx')import rospyimport pygtk # Python轻松创建具有图形用户界面的程序播放音乐等pygtk.require('2.0')import gtk # GNU Image Manipulation Program (GIMP) Toolkitimport gobject # 亦称Glib对象系统，是一个程序库，它可以帮助我们使用C语言编写面向对象程序import pygst # 与 pygtk 相关pygst.require('0.10')gobject.threads_init()# 初始化import gstfrom std_msgs.msg import Stringfrom std_srvs.srv import *import osimport commandsclass recognizer(object):"""GStreamer是一个多媒体框架，它可以允许你轻易地创建、编辑与播放多媒体文件"""# 初始化系统配置def __init__(self):# 创建节点rospy.init_node("recognizer")# 全局参数self._device_name_param = "~mic_name" # 麦克风self._lm_param = "~lm" # 语言模型 language model self._dic_param = "~dict" # 语言字典self._hmm_param = "~hmm"# 识别网络 hiden markov model 隐马尔可夫模型分中英文模型# 用 gstreamer launch config 配置麦克风一些启动信息if rospy.has_param(self._device_name_param):# 按照指定的麦克风self.device_name = rospy.get_param(self._device_name_param)# 麦克风名字self.device_index = self.pulse_index_from_name(self.device_name)# 麦克风编号 IDself.launch_config = "pulsesrc device=" + str(self.device_index)# 启动信息rospy.loginfo("Using: pulsesrc device=%s name=%s", self.device_index, self.device_name)elif rospy.has_param('~source'):# common sources: 'alsasrc'self.launch_config = rospy.get_param('~source')else:self.launch_config = 'gconfaudiosrc'rospy.loginfo("麦克风配置: %s", self.launch_config) # "Launch config: %s",self.launch_configself.launch_config += " ! audioconvert ! audioresample " \+ '! vader name=vad auto-threshold=true ' \+ '! pocketsphinx name=asr ! fakesink'# 配置ros系统设置self.started = Falserospy.on_shutdown(self.shutdown)# 自主关闭self.pub = rospy.Publisher('~output', String)# 发布 ~output 参数指定的话题类型 tring 似乎缺少指定发布队列大小 tringrospy.Service("~start", Empty, self.start) # 开始服务rospy.Service("~stop", Empty, self.stop)# 结束服务# 检查模型和字典配置if rospy.has_param(self._lm_param) and rospy.has_param(self._dic_param):self.start_recognizer()else:rospy.logwarn("启动语音识别器必须指定语言模型lm,以及语言字典dic.")# rospy.logwarn("lm and dic parameters need to be set to start recognizer.")def start_recognizer(self):rospy.loginfo("开始语音识别... ")# rospy.loginfo("Starting recognizer... ")self.pipeline = gst.parse_launch(self.launch_config)# 解析麦克风配置 self.asr = self.pipeline.get_by_name('asr') # 自动语音识别模型self.asr.connect('partial_result', self.asr_partial_result)# 后面的函数self.asr.connect('result', self.asr_result)#self.asr.set_property('configured', True) # 需要开启配置 hmm模型self.asr.set_property('dsratio', 1)# 配置语言模型if rospy.has_param(self._lm_param):lm = rospy.get_param(self._lm_param)else:rospy.logerr('请配置一个语言模型 lm.')returnif rospy.has_param(self._dic_param):dic = rospy.get_param(self._dic_param)else:rospy.logerr('请配置一个语言字典 dic.')returnif rospy.has_param(self._hmm_param):hmm = rospy.get_param(self._hmm_param)else:rospy.logerr('请配置一个语言识别模型 hmm.')returnself.asr.set_property('lm', lm) # 设置asr的语言模型self.asr.set_property('dict', dic)# 设置asr的字典self.asr.set_property('hmm', hmm) # 设置asr的识别模型self.bus = self.pipeline.get_bus()self.bus.add_signal_watch()self.bus_id = self.bus.connect('message::application', self.application_message)self.pipeline.set_state(gst.STATE_PLAYING)self.started = True# 解析麦克风名称得到麦克风IDdef pulse_index_from_name(self, name):output = commands.getstatusoutput("pacmd list-sources | grep -B 1 'name: <" + name + ">' | grep -o -P '(?<=index: )[0-9]*'")if len(output) == 2:return output[1]else:raise Exception("Error. pulse index doesn't exist for name: " + name)# 停止识别器def stop_recognizer(self):if self.started:self.pipeline.set_state(gst.STATE_NULL)self.pipeline.remove(self.asr)self.bus.disconnect(self.bus_id)self.started = False# 程序关闭def shutdown(self):""" 删除所有的参数，以防影响下次启动"""for param in [self._device_name_param, self._lm_param, self._dic_param]:if rospy.has_param(param):rospy.delete_param(param)""" 关闭 GTK 进程. """gtk.main_quit()# 开始def start(self, req):self.start_recognizer()rospy.loginfo("识别器启动")return EmptyResponse()# 停止def stop(self, req):self.stop_recognizer()rospy.loginfo("识别器停止")return EmptyResponse()def asr_partial_result(self, asr, text, uttid):"""前线部分结果到主线程. """struct = gst.Structure('partial_result')struct.set_value('hyp', text)struct.set_value('uttid', uttid)asr.post_message(gst.message_new_application(asr, struct))def asr_result(self, asr, text, uttid):""" 前线结果到主线程 """struct = gst.Structure('result')struct.set_value('hyp', text)struct.set_value('uttid', uttid)asr.post_message(gst.message_new_application(asr, struct))def application_message(self, bus, msg):""" 从总线上接收应用数据. """msgtype = msg.structure.get_name()if msgtype == 'partial_result':self.partial_result(msg.structure['hyp'], msg.structure['uttid'])if msgtype == 'result':self.final_result(msg.structure['hyp'], msg.structure['uttid'])# 部分结果def partial_result(self, hyp, uttid):""" Delete any previous selection, insert text and select it. """rospy.logdebug("Partial: " + hyp)# 最终结果def final_result(self, hyp, uttid):""" Insert the final result. """msg = String()# 话题消息类型msg.data = str(hyp)# 识别语音对于成的文字rospy.loginfo(msg.data)self.pub.publish(msg)if __name__ == "__main__":start = recognizer()gtk.main()

3) 创建新的语音单词a) 创建语音单词语句文件一行一句的 txt文件例如:roscd rbx1_speech/configmore nav_commands.txt

pause speechcontinue speechmove forwardmove backwardmove backmove leftmove right...

## 中文 voice_ctr.txt前进后退左转右转向左转向右转停止加速减速

b) 编译生成语音库通过在线的一个语言模型（lm）生成http://www.speech.cs.cmu.edu/tools/lmtool-new.html上传语言文件 Upload a sentence corpus file: Browse在线编译COMPILE KNOWLEDGE BASE下载编译好的文件使用 .dic 字典文件音节/音素字典文件.lm 语言默默文件出现的概率注意中文 .dic文件是空的需要自己生成使用别人生成好的比较全的.dic文件查找自己定义的单词的音节例如：cd ewenwan/catkin_ws/src/voice_system/model/lm/zh/zh_CN/grep 停止 mandarin_notone.dic

>>>停止t ing zh ib停止听写t ing zh ib t ing x ie停止录音t ing zh ib l u y in停止注水t ing zh ib zh u sh ui呼吸停止h u x i t ing zh ib自动停止z if d ong t ing zh ib

# 编写词典文件# voice_ctr.dic前进t ing zh ib后退h ou t ui左转z uo zh uan右转y uo zh uan向左转x iang z uo zh uan向右转x iang y uo zh uan停止t ing zh ib加速j ia s u减速j ian s u

c) 编写自己的launch启动文件

voice_nav_commands.launch

<launch><node name="recognizer" pkg="pocketsphinx" type="recognizer.py" output="screen"> #识别器<param name="lm" value="$(find rbx1_speech)/config/nav_commands.lm"/> #新的语言模型<param name="dict" value="$(find rbx1_speech)/config/nav_commands.dic"/> #新的语言单词库</node></launch>

my_voice_asr.launch

<launch><node name="recognizer" pkg="voice_system" type="my_recognizer.py" output="screen"> <param name="lm" value="$(find voice_system)/model/lm/zh/zh_MY/voice_ctr.lm"/><param name="dict" value="$(find voice_system)/model/lm/zh/zh_MY/voice_ctr.dic"/><param name="hmm" value="$(find voice_system)/model/hmm/zh/tdt_sc_8k"/></launch>

d) 运行测试roslaunch voice_system my_voice_asr.launchrostopic echo /recognizer/output4) 编写语音控制文件需要另外一个节点订阅 /recognizer/output话题得到上的文本控制命令根据文本控制命令，给出实际的机器人控制指令:如简单的发布到 cmd_vel话题上的 Twist移动指令，或者导航命令 move_base_msgs.msg 的 MoveBaseGoal 类型指令

# voice_cmd_vel.py 发布 cmd_vel/Twist 命令import roslib; roslib.load_manifest('pocketsphinx')import rospyimport mathfrom geometry_msgs.msg import Twist # cmd_vel/Twist from std_msgs.msg import Stringclass voice_cmd_vel:def __init__(self): # initializedrospy.on_shutdown(self.cleanup) # auto shutdown 清零速度self.speed = 0.2# 默认速度和速度标志 self.msg = Twist() # cmd_vel type# publish to cmd_vel, subscribe to speech outputself.pub_ = rospy.Publisher('cmd_vel', Twist) # 发布的命令话题# 类型回调函数rospy.Subscriber('recognizer/output', String, self.speechCb) # 订阅的识别后的文本控制命令r = rospy.Rate(10.0)while not rospy.is_shutdown():self.pub_.publish(self.msg) #发布控制命令r.sleep()def speechCb(self, msg):rospy.loginfo(msg.data) #终端显示文本命令if msg.data.find("full speed") > -1: #全速if self.speed == 0.2:self.msg.linear.x = self.msg.linear.x*2self.msg.angular.z = self.msg.angular.z*2self.speed = 0.4if msg.data.find("half speed") > -1: #半速if self.speed == 0.4:self.msg.linear.x = self.msg.linear.x/2self.msg.angular.z = self.msg.angular.z/2self.speed = 0.2if msg.data.find("forward") > -1: #直线前进self.msg.linear.x = self.speed #默认速度0.2 或0.4self.msg.angular.z = 0elif msg.data.find("left") > -1:#左转if self.msg.linear.x != 0:if self.msg.angular.z < self.speed:self.msg.angular.z += 0.05else: self.msg.angular.z = self.speed*2 #线速度为零左转为逆时针 antcolockwise 逆时针为正方向elif msg.data.find("right") > -1: #右转if self.msg.linear.x != 0:if self.msg.angular.z > -self.speed:self.msg.angular.z -= 0.05else: self.msg.angular.z = -self.speed*2 #线速度为零右转为顺时针 colockwise 顺时针为反方向elif msg.data.find("back") > -1: #后退self.msg.linear.x = -self.speed #线速度取反self.msg.angular.z = 0elif msg.data.find("stop") > -1 or msg.data.find("halt") > -1:#停止self.msg = Twist() #速度清零self.pub_.publish(self.msg) #发布速度命令def cleanup(self):# stop the robot!twist = Twist()self.pub_.publish(twist)if __name__=="__main__":rospy.init_node('voice_cmd_vel')try:voice_cmd_vel()except:pass

# voice_nav.py 语言导航还是 cmd_vel/Twist 消息import rospyfrom geometry_msgs.msg import Twist #cmd_vel/Twist 消息from std_msgs.msg import String#语音识别from math import copysign # 克隆符号copysign(x,y) 将 y的正负号给x copysign（1,-5）=-1class VoiceNav:#初始化def __init__(self):rospy.init_node('voice_nav') #创建节点 rospy.on_shutdown(self.cleanup) #自动退出 # Set a number of parameters affecting the robot's speed# 参数设置可以 rospara set修改或者 launch文件修改self.max_speed = rospy.get_param("~max_speed", 0.4)self.max_angular_speed = rospy.get_param("~max_angular_speed", 1.5)self.speed = rospy.get_param("~start_speed", 0.1)self.angular_speed = rospy.get_param("~start_angular_speed", 0.5)self.linear_increment = rospy.get_param("~linear_increment", 0.05)self.angular_increment = rospy.get_param("~angular_increment", 0.4)# 执行频率self.rate = rospy.get_param("~rate", 5)r = rospy.Rate(self.rate) # 是否停止语言控制的标志 self.paused = False # 初始化命令消息 Initialize the Twist message we will publish.self.cmd_vel = Twist()# 发布命令Publish the Twist message to the cmd_vel topicself.cmd_vel_pub = rospy.Publisher('cmd_vel', Twist, queue_size=5) # 订阅语音识别的话题获得文本命令 Subscribe to the /recognizer/output topic to receive voice commands.# 回调函数rospy.Subscriber('/recognizer/output', String, self.speech_callback)# 关键字到命令的映射字典 A mapping from keywords or phrases to commandsself.keywords_to_command = {'stop': ['stop', 'halt', 'abort', 'kill', 'panic', 'off', 'freeze', 'shut down', 'turn off', 'help', 'help me'],'slower': ['slow down', 'slower'],'faster': ['speed up', 'faster'],'forward': ['forward', 'ahead', 'straight'],'backward': ['back', 'backward', 'back up'],'rotate left': ['rotate left'],'rotate right': ['rotate right'],'turn left': ['turn left'],'turn right': ['turn right'],'quarter': ['quarter speed'],'half': ['half speed'],'full': ['full speed'],'pause': ['pause speech'],'continue': ['continue speech']}rospy.loginfo("Ready to receive voice commands")# We have to keep publishing the cmd_vel message if we want the robot to keep moving.while not rospy.is_shutdown():self.cmd_vel_pub.publish(self.cmd_vel) #发布速度命令r.sleep() def get_command(self, data):# Attempt to match the recognized word or phrase to the # keywords_to_command dictionary and return the appropriate# commandfor (command, keywords) in self.keywords_to_command.iteritems():for word in keywords:if data.find(word) > -1: #如果找到包含相应关键字的语言就返回对应的命令return commanddef speech_callback(self, msg):# Get the motion command from the recognized phrase 得到命令command = self.get_command(msg.data) # Log the command to the screenrospy.loginfo("Command: " + str(command)) # 终端回显 # If the user has asked to pause/continue voice control,# set the flag accordingly 语音控制标志if command == 'pause':self.paused = Trueelif command == 'continue':self.paused = False# If voice control is paused, simply return without# performing any actionif self.paused: #不使用语言控制则返回return # The list of if-then statements should be fairly# self-explanatoryif command == 'forward': self.cmd_vel.linear.x = self.speed #前进self.cmd_vel.angular.z = 0elif command == 'rotate left':self.cmd_vel.linear.x = 0self.cmd_vel.angular.z = self.angular_speed # 原地左转为正方向速度为正值elif command == 'rotate right': self.cmd_vel.linear.x = 0self.cmd_vel.angular.z = -self.angular_speed # 原地右转为负值elif command == 'turn left': #向左转if self.cmd_vel.linear.x != 0:self.cmd_vel.angular.z += self.angular_incrementelse: self.cmd_vel.angular.z = self.angular_speedelif command == 'turn right':#向右转if self.cmd_vel.linear.x != 0:self.cmd_vel.angular.z -= self.angular_incrementelse: self.cmd_vel.angular.z = -self.angular_speedelif command == 'backward': #后退self.cmd_vel.linear.x = -self.speed#为负数值self.cmd_vel.angular.z = 0elif command == 'stop':# 停止# Stop the robot! Publish a Twist message consisting of all zeros. self.cmd_vel = Twist() #速度清零elif command == 'faster': #加速self.speed += self.linear_increment #记录self.angular_speed += self.angular_incrementif self.cmd_vel.linear.x != 0:self.cmd_vel.linear.x += copysign(self.linear_increment, self.cmd_vel.linear.x) #原来是负数就更负加速倒退：原来是正数就更大加速前进if self.cmd_vel.angular.z != 0:self.cmd_vel.angular.z += copysign(self.angular_increment, self.cmd_vel.angular.z) #同上，确保加速正转或加速反转elif command == 'slower':self.speed -= self.linear_incrementself.angular_speed -= self.angular_incrementif self.cmd_vel.linear.x != 0:self.cmd_vel.linear.x -= copysign(self.linear_increment, self.cmd_vel.linear.x) #同上，确保减速前进或减速倒退if self.cmd_vel.angular.z != 0:self.cmd_vel.angular.z -= copysign(self.angular_increment, self.cmd_vel.angular.z) #同上，确保减速正转或减速反转elif command in ['quarter', 'half', 'full']:if command == 'quarter':self.speed = copysign(self.max_speed / 4, self.speed) #最大速度的 1/4 确保和原来方向一样elif command == 'half':self.speed = copysign(self.max_speed / 2, self.speed) #最大速度的 1/2 确保和原来方向一样elif command == 'full':self.speed = copysign(self.max_speed, self.speed)#最大速度确保和原来方向一样if self.cmd_vel.linear.x != 0:self.cmd_vel.linear.x = copysign(self.speed, self.cmd_vel.linear.x)if self.cmd_vel.angular.z != 0:self.cmd_vel.angular.z = copysign(self.angular_speed, self.cmd_vel.angular.z)else:return#确保速度不要超过最大速度限制self.cmd_vel.linear.x = min(self.max_speed, max(-self.max_speed, self.cmd_vel.linear.x)) self.cmd_vel.angular.z = min(self.max_angular_speed, max(-self.max_angular_speed, self.cmd_vel.angular.z))def cleanup(self):# When shutting down be sure to stop the robot!twist = Twist()self.cmd_vel_pub.publish(twist)rospy.sleep(1)if __name__=="__main__":try:VoiceNav()rospy.spin()except rospy.ROSInterruptException:rospy.loginfo("Voice navigation terminated.")

# 中文控制命令#!/usr/bin/env python# -*- coding:utf-8 -*-"""根据语言识别结果的发布话题上的控制命令，发布速度命令到cmd_vel话题上，地盘接收到速度命令控制电机转动前进"""import rospyfrom geometry_msgs.msg import Twist # 速度命令消息类型from std_msgs.msg import Stringfrom math import copysign # # 自定义语音控制的类class VoiceNav:def __init__(self):rospy.init_node('voice_nav') # 节点名rospy.on_shutdown(self.cleanup)# 一些参数，launch文件或配置文件可修改self.max_speed = rospy.get_param("~max_speed", 0.8)# 最大线速度 m/sself.max_angular_speed = rospy.get_param("~max_angular_speed", 1.5)# 最大角速度 self.speed = rospy.get_param("~start_speed", 0.3) # 前进开始速度self.angular_speed = rospy.get_param("~start_angular_speed", 0.5) # 旋转开始角速度self.linear_increment = rospy.get_param("~linear_increment", 0.05) # 线速度每次增加self.angular_increment = rospy.get_param("~angular_increment", 0.4)# 角速度每次增加# 发布频率self.rate = rospy.get_param("~rate", 5)# 指令发布频率r = rospy.Rate(self.rate) # 语音控制标志默认开启self.paused = False# 初始化要发布在cmd_vel话题上的速度类型self.cmd_vel = Twist()# 发布话题话题名消息类型队列大小self.cmd_vel_pub = rospy.Publisher('cmd_vel', Twist, queue_size=5) # 订阅语言识别发布话题类型回调函数rospy.Subscriber('/recognizer/output', String, self.speech_callback)# 控制命令关键字 self.keywords_to_command = {'停止': ['停止', '停下来', '停止前进', '关闭', '睡觉', '停', '禁止', '关闭', 'help me'],'减速': ['减慢', '慢', '减慢速度', '变慢', '慢下来', '慢速', '减速'],'加速': ['加速', '快', '加快速度', '加快', '加速上去', '加速前进'],'前进': ['前进', '向前移动', '向前走', '过来', '全速前进'],'后退': ['后退', '向后移动', '向后走', '倒退','全速倒退'],'正转': ['正转', '正向自转', '正向旋转', '原地自转', '转圈', '正向'],'反转': ['反转', '反向自转', '反向旋转', '原地反转', '反向'],'左转': ['向左转弯', '左转', '向左移动', '向左走', '左转弯'],'右转': ['向右转弯', '右转', '向右移动', '向左走', '左转弯'],'当前位置': ['位置', '当前位置', '报告位置', '汇报位置'],'当前任务': ['任务', '当前任务', '报告任务', '回报任务'],'当前状态': ['当前状态', '状态'],'关语控': ['关闭语音控制', '禁用语音控制'],'开语控': ['打开语音控制', '启用语音控制']}rospy.loginfo("已经准备好接受语言控制命令")# 按发送频率发送命令while not rospy.is_shutdown():self.cmd_vel_pub.publish(self.cmd_vel)r.sleep() # 查找命令def get_command(self, data):# 在控制命令关键字中查找有关控制命令for (command, keywords) in self.keywords_to_command.iteritems():for word in keywords:if data.find(word) > -1:return command# 回调函数根据语言命令执行发送速度指令或其他内容 def speech_callback(self, msg):# 从语音识别话题上得到指令command = self.get_command(msg.data)# 语言识别话题上的消息 # 显示语言识别信息rospy.loginfo("Command: " + str(command))# 语音控制开关if command == '关语控':self.paused = Trueelif command == '开语控':self.paused = Falseif self.paused:return if command == '前进': self.cmd_vel.linear.x = self.speedself.cmd_vel.angular.z = 0elif command == '反转':self.cmd_vel.linear.x = 0self.cmd_vel.angular.z = self.angular_speedelif command == '正转': self.cmd_vel.linear.x = 0self.cmd_vel.angular.z = -self.angular_speedelif command == '左转':if self.cmd_vel.linear.x != 0:self.cmd_vel.angular.z += self.angular_incrementelse: self.cmd_vel.angular.z = self.angular_speedelif command == '右转': if self.cmd_vel.linear.x != 0:self.cmd_vel.angular.z -= self.angular_incrementelse: self.cmd_vel.angular.z = -self.angular_speedelif command == '后退':self.cmd_vel.linear.x = -self.speedself.cmd_vel.angular.z = 0elif command == '停止': # Stop the robot! Publish a Twist message consisting of all zeros. self.cmd_vel = Twist()elif command == '加速':self.speed += self.linear_incrementself.angular_speed += self.angular_incrementif self.cmd_vel.linear.x != 0:self.cmd_vel.linear.x += copysign(self.linear_increment, self.cmd_vel.linear.x)if self.cmd_vel.angular.z != 0:self.cmd_vel.angular.z += copysign(self.angular_increment, self.cmd_vel.angular.z)elif command == '减速':self.speed -= self.linear_incrementself.angular_speed -= self.angular_incrementif self.cmd_vel.linear.x != 0:self.cmd_vel.linear.x -= copysign(self.linear_increment, self.cmd_vel.linear.x)if self.cmd_vel.angular.z != 0:self.cmd_vel.angular.z -= copysign(self.angular_increment, self.cmd_vel.angular.z)else:returnself.cmd_vel.linear.x = min(self.max_speed, max(-self.max_speed, self.cmd_vel.linear.x))self.cmd_vel.angular.z = min(self.max_angular_speed, max(-self.max_angular_speed, self.cmd_vel.angular.z))def cleanup(self):# 停止速度twist = Twist()self.cmd_vel_pub.publish(twist)rospy.sleep(1)if __name__=="__main__":try:VoiceNav()rospy.spin()except rospy.ROSInterruptException:rospy.loginfo("语言导航结束")

进行语言控制实验

a) 仿真实验1. 运行机器人 roslaunch rbx1_bringup fake_turtlebot.launch2. rviz rosrun rviz rviz -d `rospack find rbx1_nav`/sim.rviz3. 控制台rqt_console &4. 运行语音识别 roslaunch rbx1_speech voice_nav_commands.launch或者中文 roslaunch voice_system my_voice_asr.launch5.运行文本控制 roslaunch rbx1_speech turtlebot_voice_nav.launch或者中文 roslaunch voice_system my_sim_turtlebot_voice_nav.launchb) 实体机器人实验1. 运行机器人 roslaunch rbx1_bringup turtlebot_minimal_create.launch2. 控制台 rqt_console &3. 运行语音识别 roslaunch rbx1_speech voice_nav_commands.launch4. 运行文本控制 roslaunch rbx1_speech turtlebot_voice_nav.launchc) 视频解说/watch?v=10ysYZUX_jA

#############################################################

#################################################################

2、语音合成 Festival sound_play 是 ros-indigo-audio-common 的一部分

1) 安装sudo apt-get install ros-indigo-audio-commonsudo apt-get install libasound22) 测试运行服务节点 rosrun sound_play soundplay_node.py添加发音文本 rosrun sound_play say.py "Greetings Humans. Take me to your leader."默认发音： voice_kal_diphone3) 添加新的发音库 a) 查看已经安装的发音库ls /usr/share/festival/voices/english >>> 就一个 voice_kal_diphoneb) 查看全部的 Festival语言库sudo apt-cache search --names-only festvox-*>>>

festvox-don - minimal British English male speaker for festivalfestvox-ellpc11k - Castilian Spanish male speaker for Festivalfestvox-en1 - mbrola-en1 voice support for festivalfestvox-rablpc16k - British English male speaker for festival, 16khz sample ratefestvox-rablpc8k - British English male speaker for festival, 8khz sample ratefestvox-us1 - mbrola-us1 voice support for festivalfestvox-us2 - mbrola-us2 voice support for festivalfestvox-us3 - mbrola-us3 voice support for festivalfestvox-ca-ona-hts - Catalan female speaker for festival, 16kHz HTSfestvox-czech-dita - Czech adult female speaker "dita" for Festivalfestvox-czech-krb - Czech child male speaker "krb" for Festivalfestvox-czech-machac - Czech adult male speaker "machac" for Festivalfestvox-czech-ph - Czech male speaker for Festivalfestvox-hi-nsk - Hindi male speaker for festivalfestvox-italp16k - Italian female speaker for Festivalfestvox-itapc16k - Italian male speaker for Festivalfestvox-kallpc16k - American English male speaker for festival, 16khz sample ratefestvox-kallpc8k - American English male speaker for festival, 8khz sample ratefestvox-kdlpc16k - American English male speaker for festival, 16khz sample ratefestvox-kdlpc8k - American English male speaker for festival, 8khz sample ratefestvox-mr-nsk - Marathi male speaker for festivalfestvox-ru - Russian male speaker for Festivalfestvox-suopuhe-common - Common files for Festival Finnish speakersfestvox-suopuhe-lj - Finnish female speaker for Festivalfestvox-suopuhe-mv - Finnish male speaker for festivalfestvox-te-nsk - Telugu (te) male speaker for festival

c) 安装指定的语音库sudo apt-get install festvox-dond) 测试新的发音指定发音源rosrun sound_play say.py "Welcome to the future" voice_don_diphonee) 播放语言文件声音rosrun sound_play play.py `rospack find rbx1_speech`/sounds/R2D2a.wavf) 播放内建音乐数字1~5rosrun sound_play playbuiltin.py 44) 使用ros节点

# talkback.pyimport rospyfrom std_msgs.msg import Stringfrom sound_play.libsoundplay import SoundClient # SoundClient 客户端 import sysclass TalkBack:def __init__(self, script_path): #初始化类rospy.init_node('talkback') #新建节点rospy.on_shutdown(self.cleanup)#自尽# Set the default TTS voice to use 声音源self.voice = rospy.get_param("~voice", "voice_don_diphone") # Set the wave file path if used 文件路径self.wavepath = rospy.get_param("~wavepath", script_path + "/../sounds")# Create the sound client objectself.soundhandle = SoundClient() #客户端# Wait a moment to let the client connect to the# sound_play serverrospy.sleep(1)# Make sure any lingering sound_play processes are stopped.self.soundhandle.stopAll()# 终止发音# Announce that we are ready for inputself.soundhandle.playWave(self.wavepath + "/R2D2a.wav") #播放声音文件rospy.sleep(1)self.soundhandle.say("Ready", self.voice)#发音rospy.loginfo("Say one of the navigation commands...")# Subscribe to the recognizer output and set the callback functionrospy.Subscriber('/recognizer/output', String, self.talkback) #订阅语音识别话题def talkback(self, msg):# Print the recognized words on the screenrospy.loginfo(msg.data)#回显 # Speak the recognized words in the selected voiceself.soundhandle.say(msg.data, self.voice) #发音 # Uncomment to play one of the built-in sounds#rospy.sleep(2)#self.soundhandle.play(5)# Uncomment to play a wave file#rospy.sleep(2)#self.soundhandle.playWave(self.wavepath + "/R2D2a.wav")def cleanup(self):self.soundhandle.stopAll()rospy.loginfo("Shutting down talkback node...")if __name__=="__main__":try:TalkBack(sys.path[0])rospy.spin()except rospy.ROSInterruptException:rospy.loginfo("Talkback node terminated.")5) 测试roslaunch rbx1_speech talkback.launch

二、国内语音识别

1、科大讯飞 TTS 部分

进去官网 /index.php/mycloud/app/appManage注册帐号下载 SDK 解压示例目录下：/voice/samples/tts_sample含有 .c源文件一个编译文件Ｍakefile# common makefile header

DIR_INC = ../../include # 包含文件路径变量DIR_BIN = ../../bin# 二进制文件库路径变量DIR_LIB = ../../libs# 依赖库路径变量# executive file nameTARGET= tts_sample# 目标文件编译生成的可执行文件名BIN_TARGET = $(DIR_BIN)/$(TARGET) # 可执行文件保存路径在bin目录下CROSS_COMPILE = # 交叉编译CFLAGS = -g -Wall -I$(DIR_INC)# 根据系统设置想要的可执行文件依赖路径可直接修改为（本系统64位） LDFLAGS := -L$(DIR_LIB)/x64ifdef LINUX64LDFLAGS := -L$(DIR_LIB)/x64elseLDFLAGS := -L$(DIR_LIB)/x86 endif# 编译系统依赖LDFLAGS += -lmsc -lrt -ldl -lpthread OBJECTS := $(patsubst %.c,%.o,$(wildcard *.c))$(BIN_TARGET) : $(OBJECTS)$(CROSS_COMPILE)gcc $(CFLAGS) $^ -o $@ $(LDFLAGS)# executive file type%.o : %.c$(CROSS_COMPILE)gcc -c $(CFLAGS) $< -o $@clean:@rm -f *.o $(BIN_TARGET).PHONY:clean

64bit_make.sh 编译环境设置脚本#common makefile foot #编译64位可执行文件make clean;make LINUX64=1 #设置64为标志#设置libmsc.so库搜索路径export LD_LIBRARY_PATH=$(pwd)/../../libs/x64/ #拓展搜索库路径省去 source 64bit_make.sh将 /lib/x64/libmsc.so 拷贝到系统搜索库路径下sudo cp /libs/x64/libmsc.so /usr/bin并修改 Makefile 文件2、将科大 tts 应用到ros节点上1)创建包cd catkin_ws/src catkin_creat_pkg voice_system roscpp rospy std_msgs将科大迅飞包下 include目录下的头文件拷贝到刚刚创建的包voice下的include内msp_cmn.hmsp_errors.hmsp_types.hqise.hqisr.hqtts.h2)创建节点文件根据 tts.sample.c文件修改创建 kdxf_ttf.cpp文件主要就是订阅一个自定义话题上的文本消息传给科大tts 播放wav文件

/** 语音合成（Text To Speech，TTS）技术能够自动将任意文字实时转换为连续的* 自然语音，是一种能够在任何时间、任何地点，向任何人提供语音信息服务的* 高效便捷手段，非常符合信息时代海量数据、动态更新和个性化查询的需求。*/#include "ros/ros.h" //ros系统头文件#include "std_msgs/String.h" #include <stdio.h>#include <string.h>#include <stdlib.h>#include <unistd.h>#include "qtts.h"#include "msp_cmn.h"#include "msp_errors.h"const char* filename = "/home/ewenwan/ME/music/music.wav";const char* playwavpath = "play /home/ewenwan/ME/music/music.wav";/* wav音频头部格式 */typedef struct _wave_pcm_hdr{char riff[4];// = "RIFF" RIFF(Resource Interchange File Format) 资源交换文件规范int size_8; // = FileSize - 8 从下个地址开始到文件尾的总字节数char wave[4];// = "WAVE" WAV文件标志（WAVE）char fmt[4]; // = "fmt " 波形格式标志（fmt ），最后一位空格。int fmt_size; // = 下一个结构体的大小 : 16 过滤字节（一般为00000010H，16d），若为00000012H则说明数据头携带附加信息（见“附加信息”）。short int format_tag; // = PCM : 1 格式种类（值为1时，表示数据为线性PCM编码）short int channels;// = 通道数 : 1通道数，单声道为1，双声道为2int samples_per_sec; // = 采样率 : 8000 | 6000 | 11025 | 16000int avg_bytes_per_sec;// = 每秒字节数 : samples_per_sec * bits_per_sample / 8short int block_align; // = 每采样点字节数 : wBitsPerSample / 8short int bits_per_sample; // = 量化比特数: 8 | 16char data[4];// = "data";int data_size; // = 纯数据长度 : FileSize - 44 } wave_pcm_hdr;/* 默认wav音频头部数据 */wave_pcm_hdr default_wav_hdr = {{ 'R', 'I', 'F', 'F' }, //文件规范0, //后面数据的大小（前面有4个字节）{'W', 'A', 'V', 'E'}, //文件格式{'f', 'm', 't', ' '}, //波形格式标志最后一位空16, //过滤字节数1, //格式种类（值为1时，表示数据为线性PCM编码）1, //通道数，单声道为1，双声道为216000, //采样率 : 8000 | 6000 | 11025 | 1600032000, //每秒字节数 : samples_per_sec * bits_per_sample / 82, //每采样点字节数 : wBitsPerSample / 816, //量化比特数: 8 | 16{'d', 'a', 't', 'a'}, //"data";0 //纯音频数据长度数据长度 : FileSize - 44 （前面有40个字节）};/* 文本合成函数 text, filename, session_begin_params */int text_to_speech(const char* src_text, const char* des_path, const char* params){intret= -1; //返回参数FILE* fp = NULL; //文件句柄文件头const char* sessionID = NULL; //unsigned int audio_len = 0;wave_pcm_hdr wav_hdr= default_wav_hdr; //wav文件头intsynth_status = MSP_TTS_FLAG_STILL_HAVE_DATA;if (NULL == src_text || NULL == des_path) //原文本空或者生成的文件名路径为空返回{printf("params is error!\n");return ret;}fp = fopen(des_path, "wb");//二进制格式写入if (NULL == fp){printf("open %s error.\n", des_path); //打开文件失败return ret;}/* 开始合成合成语音参数标志*/sessionID = QTTSSessionBegin(params, &ret);//开始一次语音合成，分配语音合成资源。if (MSP_SUCCESS != ret)//参数不合规范{printf("QTTSSessionBegin failed, error code: %d.\n", ret);fclose(fp); //关闭文件return ret;}// int MSPAPI QTTSTextPut ( const char * sessionID, const char * textString, unsigned int textLen, const char * params ) // 由QTTSSessionBegin返回的句柄字符串指针,指向待合成的文本字符串合成文本长度,最大支持8192个字节（2730个汉字）本次合成所用的参数，只对本次合成的文本有效。ret = QTTSTextPut(sessionID, src_text, (unsigned int)strlen(src_text), NULL);//写入待合成的文本。if (MSP_SUCCESS != ret){printf("QTTSTextPut failed, error code: %d.\n",ret);QTTSSessionEnd(sessionID, "TextPutError"); //发生错误的话就结束本次语音合成。 fclose(fp); //关闭文件return ret;}printf("正在合成 ...\n");//写入文件头信息大小写入次数文件名fwrite(&wav_hdr, sizeof(wav_hdr) ,1, fp); //添加wav音频头，使用采样率为16000while (1) {/* 获取合成音频 */ // 由QTTSSessionBegin返回的句柄合成音频长度,单位字节合成音频状态成功与否 const void* data = QTTSAudioGet(sessionID, &audio_len, &synth_status, &ret); //获取合成音频。 if (MSP_SUCCESS != ret) //为成功break;if (NULL != data) //合成的音频有内容{fwrite(data, audio_len, 1, fp); //写入内容长度次数文件名wav_hdr.data_size += audio_len; //总音频长度计算data_size大小用于记录头文件 size_8 大小 }if (MSP_TTS_FLAG_DATA_END == synth_status) //合成的音频已经取完break;printf(">");usleep(150*1000);//防止频繁占用CPU}printf("\n");if (MSP_SUCCESS != ret) //获取音频未成功{printf("QTTSAudioGet failed, error code: %d.\n",ret);QTTSSessionEnd(sessionID, "AudioGetError");//发生错误的话就结束本次语音合成。fclose(fp);return ret;}/* 修正wav文件头数据的大小 */wav_hdr.size_8 += wav_hdr.data_size + (sizeof(wav_hdr) - 8); //头文件长度+音频长度 -8 /* 将修正过的数据写回文件头部,音频文件为wav格式 */fseek(fp, 4, 0); //偏移4个字节从第4个字节开始写入 size_8fwrite(&wav_hdr.size_8,sizeof(wav_hdr.size_8), 1, fp); //写入size_8的值fseek(fp, 40, 0); //偏移4个字节 //将文件指针偏移到存储data_size值的位置fwrite(&wav_hdr.data_size,sizeof(wav_hdr.data_size), 1, fp); //写入data_size的值fclose(fp);//关闭文件fp = NULL;/* 合成完毕 */ret = QTTSSessionEnd(sessionID, "Normal"); //结束本次语音合成。if (MSP_SUCCESS != ret){printf("QTTSSessionEnd failed, error code: %d.\n",ret);}return ret;}/*make topic Text To Wave file*/int makeTextToWave(const char* text, const char* filename){int ret = MSP_SUCCESS; //默认返回参数/* appid 应用ID。*/const char* login_params = "appid = 58dbcf6e, work_dir = .";//登录参数,appid与msc库绑定,请勿随意改动/** rdn: 合成音频数字发音方式 0：数值优先（车牌号报读）,1：完全数值(1000 一千),2：完全字符串，3：字符串优先。默认为0 * volume: 合成音频的音量[0,100]，数值越大音量越大。默认为50 * pitch: 合成音频的音调[0,100]，数值越大音调越高。默认为50 * speed: 合成音频对应的语速 [0,100]，数值越大语速越快。默认为50 * voice_name: 合成发音人 xiaoyan yanping jinger yufeng donaldduck babyxu nannan xiaoqian（东北话）* sample_rate: 合成音频采样率* text_encoding: 合成文本编码格式 GB2312；GBK；BIG5；UNICODE；GB18030；UTF8 * background_sound 合成音频中的背景音 0：无背景音乐 1：有背景音乐。默认为0 * 详细参数说明请参阅《讯飞语音云MSC--API文档》*/const char* session_begin_params = "voice_name = donaldduck, text_encoding = utf8, sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 0";//const char* filename = "tts_sample.wav"; //合成的语音文件名称//const char* text = "大家好，我叫小明，车牌号沪A1005，两套房子，五百万存款"; //合成文本/* 用户登录 int MSPAPI MSPLogin ( const char * usr,const char * pwd,const char * params )*/ret = MSPLogin(NULL, NULL, login_params);//第一个参数是用户名，第二个参数是密码，第三个参数是登录参数，用户名和密码可在注册获取if (MSP_SUCCESS != ret){printf("MSPLogin failed, error code: %d.\n", ret);goto exit ;//登录失败，退出登录}/* 文本合成 */printf("开始合成 ...\n");/*要合成语音的文本合成后的语音文件wav名字合成语音的参数*/ret = text_to_speech(text, filename, session_begin_params);if (MSP_SUCCESS != ret){printf("text_to_speech failed, error code: %d.\n", ret);}else{printf("合成完毕\n");}exit:MSPLogout(); //退出登录//return 0;}/*播放 wav文件*/void playWav(){system(playwavpath); // 需要安装 sudo apt-get install sox}/*subcribe topic callbace function*/void chatterCallback(const std_msgs::String::ConstPtr& msg){//ROS_INFO("I heard: [%s]", msg->data.c_str());//const char* text=msg->data.c_str();std::cout << " I hear topic text:" << msg->data.c_str() << std::endl;makeTextToWave( msg->data.c_str() , filename );playWav();ROS_INFO("READY TO DO TTS");}int main(int argc, char** argv){const char* start_voice ="科大讯飞在线语音合成启动成功";makeTextToWave( start_voice , filename );playWav();ROS_INFO("READY TO TTS");ros::init(argc, argv, "tts"); //初始化ros系统，在roscore节点管理器注册节点ros::NodeHandle nhd; //节点句柄//节点创建一个发布者//ros::Publisher chatter_pub = nh.advertise<std_msgs::String>("pub_hello_ros", 1000);//创建一个订阅者sub节点句柄话题缓存区函数指针 &callbackfunc 得到ros::Subscriber sub = nhd.subscribe("voice/tts", 100, &chatterCallback);//ros::spin();ros::Rate rate(5); //发布频率while(ros::ok()){ros::spinOnce();//给ROS控制权可以调用一次回调函数rate.sleep();} return 0;}

3)修改CMakeLists 文件共有三个部分：a) 添加头文件搜索 .h文件include_directories( #头文件路径include#cpp文件自建的头文件 ${catkin_INCLUDE_DIRS})b) 添加编译源文件信息add_executable(xftts src/xf_ttf.cpp)c) 添加动态搜索库 .o文件可执行文件名target_link_libraries(xftts ${catkin_LIBRARIES} -lmsc -lrt -ldl -lpthread ) #系统库和外库（由原先的 make文件得来）注意可能会提示找不到 -lmsc可替换为完整路径:/home/ewenwan/ewenwan/catkin_ws/src/voice/libs/x64/libmsc.so #注意更换为自己的路径4) 编译cd catkin_wscatkin_make5) 依赖 play 播放wav文件 // 需要安装 sudo apt-get install sox6) 实验rosrun voice_system xftts发布话题数据rostopic pub -1 voice/xf_tts_topic std_msgs/String "你帅呆了"

2、图灵 NLU 部分语意理解会结合科大tts直接对话交流

1)安装依赖库sudo apt-get install libcurl3 libcurl4-openssl-dev //curl httppose访问sudo apt-get install libjsoncpp1 libjsoncpp-dev //json字符串2)源文件编写cd catkin_ws/src/voice_system新建 tl_nlu.cpp

/** 图灵 NLU 在线语意理解*/#include "ros/ros.h" //ros系统头文件#include "std_msgs/String.h" #include <sstream>#include <jsoncpp/json/json.h> //json字符串#include <curl/curl.h> //curl http访问#include <string>#include <exception>using namespace std;//全局变量int flag = 0;string result;/*解析图灵服务器返回的json字符串*/int parseJsonResonse(string input_str){Json::Value root;Json::Reader reader;bool parsingSuccessful = reader.parse(input_str, root);if( !parsingSuccessful ){std::cout << "Fail to parse the response data" << std::endl;return -1;}const Json::Value code = root["code"];//文本类型标识吗const Json::Value text = root["text"];//返回的结果result = text.asString(); //返回文本内容flag = 1;std::cout << "response code: " << code << std::endl;std::cout << "response text: " << result << std::endl;return 0;}/*将接收到的返回数据写如内存*/int writer(char *data, size_t size, size_t number, string *writerData){if ( writerData == NULL){return -1;}unsigned long len = size * number;writerData->append(data, len);return len;}/*HTTP 请求*/int HttpPostRequest(string input_str){string buffer;std::string strJson = "{" ;strJson += "\"key\" : \"fcabefe6a6ca48ff8c7d4f5dfccf0627\",";strJson += "\"info\" : ";strJson += "\"";strJson += input_str;strJson += "\"";strJson += "}";std::cout << "post json string: " << strJson << std::endl;try{CURL *pCurl = NULL;CURLcode res; //返回状态curl_global_init(CURL_GLOBAL_ALL); //初始化 pCurlpCurl = curl_easy_init();//头if( NULL != pCurl){curl_easy_setopt(pCurl, CURLOPT_TIMEOUT,5); //延迟时间curl_easy_setopt(pCurl, CURLOPT_URL,"/openapi/api");// http头 // 设置http发送的内容类型为JSONcurl_slist *plist = curl_slist_append(NULL, "Content-Type:application/json;charset=UTF-8");curl_easy_setopt(pCurl, CURLOPT_HTTPHEADER, plist);// 设置要POST的JSON数据curl_easy_setopt(pCurl, CURLOPT_POSTFIELDS,strJson.c_str());curl_easy_setopt(pCurl, CURLOPT_WRITEFUNCTION, &writer);curl_easy_setopt(pCurl, CURLOPT_WRITEDATA, &buffer);//执行http请求res = curl_easy_perform(pCurl);//检查错误if (res != CURLE_OK){printf("curl_easy_perform failed:%s\n", curl_easy_strerror(res));}curl_easy_cleanup(pCurl);//清除当前http请求}curl_global_cleanup();//全部清除 }catch (std::exception &ex){printf("!!! curl exception: %s.\n", ex.what());}if(buffer.empty()){std::cout << "!!! ERROR The Tuling server response NULL" << std::endl;}else{parseJsonResonse(buffer);}return 0;}/** 当voice/tl_nlu 话题有消息时，调用HttpPostRequest向图灵服务器发送内容，返回结果。*/void nluCallback(const std_msgs::String::ConstPtr& msg){std::cout << " Your question is :" << msg->data << std::endl;HttpPostRequest(msg->data);ROS_INFO("READY TO DO NLU");}int main(int argc, char** argv){ros::init(argc, argv, "tl_nlu_node"); //初始化ros系统，在roscore节点管理器注册节点ros::NodeHandle nhd; //节点句柄//创建一个订阅者sub节点句柄话题缓存区函数指针 &callbackfunc 得到ros::Subscriber sub = nhd.subscribe("voice/tl_nlu_topic", 20, &nluCallback);//节点创建一个发布者ros::Publisher pub = nhd.advertise<std_msgs::String>("voice/xf_tts_topic", 20);ROS_INFO("READY TO DO NLU");//ros::spin();ros::Rate rate(10); //频率while(ros::ok()){if(flag) //成功获取到返回数据{std_msgs::String msg;msg.data = result;pub.publish(msg); flag = 0 ;}ros::spinOnce();//给ROS控制权可以调用一次回调函数rate.sleep();} return 0;}

3)修改CMakeLists 文件# NLUadd_executable(tlnlu src/tl_nlu.cpp)target_link_libraries(tlnlu ${catkin_LIBRARIES} -lcurl -ljsoncpp) #系统库和外库 curl jsoncpp4) 编译cd catkin_wscatkin_make5) 实验roscorerosrun voice_system xftts // 文本转换成语言rosrun voice_system tlnlu // 语意理解rostopic pub -1 /voice/tl_nlu_topic std_msgs/String "给我说一个绕口令吧"

3、科大讯飞 ASR部分

示例目录下：/voice/samples/ita_record_sample.c源文件拷贝到包的源文件下 catkin_ws/src/voice_system/src.h源文件拷贝到包的包含文件下 catkin_ws/src/voice_system/include修改 ita_record_sample.c文件至 xf_asr.cpp

/** 语音听写(iFly Auto Transform)技术能够实时地将语音转换成对应的文字。* voice Activity Detection 语音活动检测一句话是否说完了*/#include<ros/ros.h>#include<std_msgs/String.h>#include<std_msgs/Int32.h>#include <stdlib.h>#include <stdio.h>#include <string.h>#include <unistd.h>#include "qisr.h"#include "msp_cmn.h"#include "msp_errors.h"#include "speech_recognizer.h"#define FRAME_LEN640 #defineBUFFER_SIZE4096#define ASRFLAG1//控制命令using namespace std; //命名空间//全局变量bool flag = false; //发布语音识别结果话题标志bool recorder_Flag = true; //可录音标志，开始麦克风录音并上传识别string result = ""; //识别结果字符串/* 上传用户字典*/static int upload_userwords(){char*userwords=NULL; //size_tlen=0; //size_tread_len=0; //FILE*fp=NULL; //intret=-1; //fp = fopen("userwords.txt", "rb"); //打开用户字典文本if (NULL == fp){printf("\nopen [userwords.txt] failed! \n"); //打开失败goto upload_exit;}fseek(fp, 0, SEEK_END);//到文件结尾len = ftell(fp); //偏移长度字节相当于文件大小fseek(fp, 0, SEEK_SET); //到文件头userwords = (char*)malloc(len + 1); //申请文件字节+1 大小的缓存区if (NULL == userwords){printf("\nout of memory! \n");//缓存区申请失败goto upload_exit;}read_len = fread((void*)userwords, 1, len, fp); //读取文件写入到缓存区if (read_len != len){printf("\nread [userwords.txt] failed!\n"); //文件应该相等goto upload_exit;}userwords[len] = '\0'; //添加结束符号//用户数据上传数据名称字符串待上传数据缓冲区的起始地址。数据长度(如果是字符串，则不包含'\0')。//params[in] "sub = uup,dtt = userword" 上传用户词表 iat业务 UTF-8 编码MSPUploadData("userwords", userwords, len, "sub = uup, dtt = userword", &ret); //上传if (MSP_SUCCESS != ret){printf("\nMSPUploadData failed ! errorCode: %d \n", ret);//上传失败goto upload_exit;}//退出upload_exit:if (NULL != fp){fclose(fp); //关闭文件fp = NULL;}if (NULL != userwords){free(userwords);//释放缓存区userwords = NULL;}return ret;}/*打印结果*/static void show_result(char *str, char is_over){printf("\rResult: [ %s ]", str);if(is_over)putchar('\n');string s(str);result = s; //得到语音识别结果flag = true; //设置发布话题为真}//全局变量static char *g_result = NULL;static unsigned int g_buffersize = BUFFER_SIZE;//对结果采取的措施void on_result(const char *result, char is_last){if (result) {size_t left = g_buffersize - 1 - strlen(g_result);size_t size = strlen(result);if (left < size) {g_result = (char*)realloc(g_result, g_buffersize + BUFFER_SIZE);if (g_result)g_buffersize += BUFFER_SIZE;else {printf("mem alloc failed\n");return;}}strncat(g_result, result, size);show_result(g_result, is_last);}}/*一段新的语句开始*/void on_speech_begin(){if (g_result){free(g_result);}g_result = (char*)malloc(BUFFER_SIZE);g_buffersize = BUFFER_SIZE;memset(g_result, 0, g_buffersize);printf("Start Listening...\n");}/* 说话结束*/void on_speech_end(int reason){if (reason == END_REASON_VAD_DETECT) //已经检测到语句vad断点了就是说了一句话了{printf("\nSpeaking done \n");recorder_Flag = false; //录音标志录音结束}elseprintf("\nRecognizer error %d\n", reason);}/* 从麦克风识别语音*/static void demo_mic(const char* session_begin_params){int errcode;struct speech_rec iat;struct speech_rec_notifier recnotifier = {on_result,on_speech_begin,on_speech_end};errcode = sr_init(&iat, session_begin_params, SR_MIC, &recnotifier);if (errcode) {printf("speech recognizer init failed\n");return;}errcode = sr_start_listening(&iat);if (errcode) {printf("start listen failed %d\n", errcode);}while(recorder_Flag){ //当可录音标志为真时开始录音并识别sleep(1);}errcode = sr_stop_listening(&iat);if (errcode) {printf("stop listening failed %d\n", errcode);}sr_uninit(&iat);}int asrToText(){int ret = MSP_SUCCESS;int upload_on =1; /* whether upload the user word *//* login params, please do keep the appid correct */const char* login_params = "appid = 58dbcf6e, work_dir = ."; //登陆参数int aud_src = 0; /* from mic or file *//** See "iFlytek MSC Reference Manual"* 识别参数*/const char* session_begin_params ="sub = iat, domain = iat, language = zh_cn, ""accent = mandarin, sample_rate = 16000, ""result_type = plain, result_encoding = utf8";/* Login first. the 1st arg is username, the 2nd arg is password* just set them as NULL. the 3rd arg is login paramertes* 登陆* */ret = MSPLogin(NULL, NULL, login_params);if (MSP_SUCCESS != ret){printf("MSPLogin failed , Error code %d.\n",ret);goto exit; // login fail, exit the program}/*字典上传？*//*printf("Want to upload the user words ? \n0: No.\n1: Yes\n");scanf("%d", &upload_on);if (upload_on){printf("Uploading the user words ...\n");ret = upload_userwords();if (MSP_SUCCESS != ret)goto exit;printf("Uploaded successfully\n");}*//*声音来源麦克分或 wav文件*//*printf("Where the audio comes from?\n0: From a audio file.\n1: From microphone.\n");scanf("%d", &aud_src);//键盘输入if(aud_src != 0) {printf("Demo recognizing the speech from microphone\n");printf("Speak in 15 seconds\n");demo_mic(session_begin_params);printf("15 sec passed\n");} else {printf("Demo recgonizing the speech from a recorded audio file\n");demo_file("wav/iflytek02.wav", session_begin_params); //选者wav文件}*/demo_mic(session_begin_params);exit:MSPLogout(); // Logout...}/** 根据发布的话题来修改录音标志*/void asrCallBack(const std_msgs::Int32::ConstPtr &msg){ ROS_INFO_STREAM("Topic is Subscriber, now starting voice recognition");if(msg->data == ASRFLAG) //话题收到相应的语音识别激活标志{asrToText();}}/* main thread: start/stop record ; query the result of recgonization.* record thread: record callback(data write)* helper thread: ui(keystroke detection)*/int main(int argc, char* argv[]){ros::init(argc, argv, "xf_asr_node"); //初始化ros系统，在roscore节点管理器注册节点ros::NodeHandle nhd; //节点句柄//创建一个订阅者sub节点句柄话题缓存区函数指针 &callbackfunc 得到ros::Subscriber sub = nhd.subscribe("voice/xf_asr_topic", 20, &asrCallBack);//节点创建一个发布者ros::Publisher pub = nhd.advertise<std_msgs::String>("voice/tl_nlu_topic", 20);ROS_INFO("please publish the kinds of std_msgs/Int32 1 to the topic voice/xf_asr_topic");ROS_INFO("waitting for the running sign of voice recognition");//ros::spin();ros::Rate rate(10); //频率while(ros::ok()){if(flag) //成功获取到返回数据{std_msgs::String msg;msg.data = result;pub.publish(msg);recorder_Flag = true;//可开始录音识别flag = false ; //发布识别结果话题标志为否}ros::spinOnce();//给ROS控制权可以调用一次回调函数rate.sleep();} return 0;;}

###CMakeLists.txt文件####

cmake_minimum_required(VERSION 2.8.3)project(voice_system)## Add support for C++11, supported in ROS Kinetic and newer# add_definitions(-std=c++11)## Find catkin macros and libraries## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)## is used, also find other catkin packagesfind_package(catkin REQUIRED COMPONENTSroscpprospystd_msgs)## System dependencies are found with CMake's conventions# find_package(Boost REQUIRED COMPONENTS system)## Uncomment this if the package has a setup.py. This macro ensures## modules and global scripts declared therein get installed## See /doc/api/catkin/html/user_guide/setup_dot_py.html# catkin_python_setup()################################################## Declare ROS messages, services and actions #################################################### To declare and build messages, services or actions from within this## package, follow these steps:## * Let MSG_DEP_SET be the set of packages whose message types you use in## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).## * In the file package.xml:## * add a build_depend tag for "message_generation"## * add a build_depend and a run_depend tag for each package in MSG_DEP_SET## * If MSG_DEP_SET isn't empty the following dependency has been pulled in##but can be declared for certainty nonetheless:##* add a run_depend tag for "message_runtime"## * In this file (CMakeLists.txt):## * add "message_generation" and every package in MSG_DEP_SET to##find_package(catkin REQUIRED COMPONENTS ...)## * add "message_runtime" and every package in MSG_DEP_SET to##catkin_package(CATKIN_DEPENDS ...)## * uncomment the add_*_files sections below as needed##and list every .msg/.srv/.action file to be processed## * uncomment the generate_messages entry below## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)## Generate messages in the 'msg' folder# add_message_files(# FILES# Message1.msg# Message2.msg# )## Generate services in the 'srv' folder# add_service_files(# FILES# Service1.srv# Service2.srv# )## Generate actions in the 'action' folder# add_action_files(# FILES# Action1.action# Action2.action# )## Generate added messages and services with any dependencies listed here# generate_messages(# DEPENDENCIES# std_msgs# )################################################## Declare ROS dynamic reconfigure parameters #################################################### To declare and build dynamic reconfigure parameters within this## package, follow these steps:## * In the file package.xml:## * add a build_depend and a run_depend tag for "dynamic_reconfigure"## * In this file (CMakeLists.txt):## * add "dynamic_reconfigure" to##find_package(catkin REQUIRED COMPONENTS ...)## * uncomment the "generate_dynamic_reconfigure_options" section below##and list every .cfg file to be processed## Generate dynamic reconfigure parameters in the 'cfg' folder# generate_dynamic_reconfigure_options(# cfg/DynReconf1.cfg# cfg/DynReconf2.cfg# )##################################### catkin specific configuration ####################################### The catkin_package macro generates cmake config files for your package## Declare things to be passed to dependent projects## INCLUDE_DIRS: uncomment this if you package contains header files## LIBRARIES: libraries you create in this project that dependent projects also need## CATKIN_DEPENDS: catkin_packages dependent projects also need## DEPENDS: system dependencies of this project that dependent projects also needcatkin_package(# INCLUDE_DIRS include# LIBRARIES voice_system# CATKIN_DEPENDS roscpp rospy std_msgs# DEPENDS system_lib)############# Build ############### Specify additional locations of header files## Your package locations should be listed before other locations# include_directories(include)include_directories( #头文件路径include#cpp文件自建的头文件 ${catkin_INCLUDE_DIRS})## Declare a C++ library# add_library(${PROJECT_NAME}# src/${PROJECT_NAME}/voice_system.cpp# )## Add cmake target dependencies of the library## as an example, code may need to be generated before libraries## either from message generation or dynamic reconfigure# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})## Declare a C++ executable## With catkin_make all packages are built within a single CMake context## The recommended prefix ensures that target names across packages don't collide# add_executable(${PROJECT_NAME}_node src/voice_system_node.cpp)## Rename C++ executable without prefix## The above recommended prefix causes long target names, the following renames the## target back to the shorter version for ease of user use## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node"# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "")## Add cmake target dependencies of the executable## same as for the library above# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})## Specify libraries to link a library or executable target against# target_link_libraries(${PROJECT_NAME}_node# ${catkin_LIBRARIES}# )# TTSadd_executable(xftts src/xf_ttf.cpp)target_link_libraries(xftts ${catkin_LIBRARIES} /home/ewenwan/ewenwan/catkin_ws/src/voice/libs/x64/libmsc.so #注意更换为自己的路径-lrt -ldl -lpthread ) #系统库和外库 msc rt dl pthread# NLUadd_executable(tlnlu src/tl_nlu.cpp)target_link_libraries(tlnlu ${catkin_LIBRARIES} -lcurl -ljsoncpp) #系统库和外库 curl jsoncpp# ASRadd_executable(xfasr src/xf_asr.cpp src/speech_recognizer.c src/linuxrec.c)target_link_libraries(xfasr ${catkin_LIBRARIES} /home/ewenwan/ewenwan/catkin_ws/src/voice/libs/x64/libmsc.so #注意更换为自己的路径-lrt -ldl -lpthread -lasound) #系统库和外库 msc rt dl pthread asound############### Install ################ all install targets should use catkin DESTINATION variables# See /doc/api/catkin/html/adv_user_guide/variables.html## Mark executable scripts (Python etc.) for installation## in contrast to setup.py, you can choose the destination# install(PROGRAMS# scripts/my_python_script# DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}# )## Mark executables and/or libraries for installation# install(TARGETS ${PROJECT_NAME} ${PROJECT_NAME}_node# ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}# LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}# RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}# )## Mark cpp header files for installation# install(DIRECTORY include/${PROJECT_NAME}/# DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}# FILES_MATCHING PATTERN "*.h"# PATTERN ".svn" EXCLUDE# )## Mark other files for installation (e.g. launch and bag files, etc.)# install(FILES# # myfile1# # myfile2# DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}# )############### Testing ################# Add gtest based cpp test target and link libraries# catkin_add_gtest(${PROJECT_NAME}-test test/test_voice_system.cpp)# if(TARGET ${PROJECT_NAME}-test)# target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})# endif()## Add folders to be run by python nosetests# catkin_add_nosetests(test)

###############################

########################

5) 实验roscorerosrun voice_system xftts // 文本转换成语言rosrun voice_system tlnlu // 语意理解rosrun voice_system xfasr // 语音识别rostopic pub -1 voice/xf_asr_topic std_msgs/Int32 1

本内容不代表本网观点和政治立场，如有侵犯你的权益请联系我们处理。

网友评论

网友评论仅供其表达个人看法，并不表明网站立场。