Dialogue manager works with boto3 lib not lex node, logger saves the …

…scripts data and also the single user audio file during the interaction. Removed any references to the lex node
interaction-lab · Feb 18, 2020 · a0b4563 · a0b4563
1 parent bf813eb
commit a0b4563
Show file tree

Hide file tree

Showing 10 changed files with 124 additions and 81 deletions.
diff --git a/README.md b/README.md
@@ -52,9 +52,6 @@ $ git clone https://github.com/micolspitale93/qt_robot_nuc.git
 $ sudo apt-get install ros-kinetic-audio-common-msgs
 $ sudo apt-get install ros-kinetic-aws-common
 $ sudo apt-get install ros-kinetic-aws-ros1-common
-$ sudo apt-get install ros-kinetic-lex-common
-$ sudo apt-get install ros-kinetic-lex-common-msgs
-$ sudo apt-get install ros-kinetic-lex-node
 ~~~~
 
 7. Make everything

diff --git a/cordial_dialogue/CMakeLists.txt b/cordial_dialogue/CMakeLists.txt
@@ -7,11 +7,8 @@ catkin_python_setup()
 find_package(catkin REQUIRED COMPONENTS
   aws_common
   aws_ros1_common
-  lex_common
-  lex_common_msgs
   roscpp
   std_msgs
-  lex_node
   actionlib_msgs
   message_generation
 )
@@ -24,8 +21,8 @@ generate_messages(
 
 catkin_package(
   INCLUDE_DIRS include
-  LIBRARIES cordial_lex
-  CATKIN_DEPENDS aws_ros1_common lex_common_msgs roscpp std_msgs
+  LIBRARIES cordial_dialogue
+  CATKIN_DEPENDS aws_ros1_common roscpp std_msgs
   #DEPENDS system_lib
 )
 
@@ -39,7 +36,6 @@ include_directories(
  include
   ${catkin_INCLUDE_DIRS}
   ${AWSSDK_INCLUDE_DIR}
-  ${lex_common_INCLUDE_DIR}
 )
 
 include_directories(

diff --git a/cordial_dialogue/launch/dialogue.launch b/cordial_dialogue/launch/dialogue.launch
@@ -3,13 +3,6 @@
 
 
    	<arg name="config_file" default="$(find cordial_dialogue)/config/configuration.yaml"/>
-	<arg name="user_id" value="$(optenv LEX_USER_ID)" doc="UserID sent when communicating with the Lex Bot, can be any string. Defauls to value in config .yaml if unset"/>
-	<arg name="lex_node_name" value="lex_node"/>
-    	<include file="$(find lex_node)/launch/lex_node.launch" >
-           	<arg name="config_file" value="$(find cordial_dialogue)/config/configuration.yaml"/>
-    	</include>
-	<arg name="node_name" value="$(arg lex_node_name)"/>	
-
   	<arg name="use_lex" default="true"/>
   	<group if="$(arg use_lex)">
 	    <node pkg="cordial_dialogue" type="dialogue.py" name="dialogue" output="screen"/>

diff --git a/cordial_dialogue/package.xml b/cordial_dialogue/package.xml
@@ -13,8 +13,6 @@
   <buildtool_depend>catkin</buildtool_depend>
   <build_depend>aws_common</build_depend>
   <build_depend>aws_ros1_common</build_depend>
-  <build_depend>lex_common</build_depend>
-  <build_depend>lex_common_msgs</build_depend>
   <build_depend>roscpp</build_depend>
   <build_depend>std_msgs</build_depend>
   <build_depend>actionlib</build_depend>
@@ -26,8 +24,6 @@
   <run_depend>actionlib</run_depend>
   <run_depend>aws_common</run_depend>
   <run_depend>aws_ros1_common</run_depend>
-  <run_depend>lex_common</run_depend>
-  <run_depend>lex_common_msgs</run_depend>
   <run_depend>roscpp</run_depend>
   <run_depend>std_msgs</run_depend>
 </package>

diff --git a/cordial_dialogue/scripts/data/1.ogg b/cordial_dialogue/scripts/data/1.ogg
diff --git a/cordial_dialogue/scripts/data/1.wav b/cordial_dialogue/scripts/data/1.wav
diff --git a/cordial_dialogue/scripts/data/PLACEHOLDER b/cordial_dialogue/scripts/data/PLACEHOLDER
diff --git a/cordial_dialogue/scripts/dialogue.py b/cordial_dialogue/scripts/dialogue.py
@@ -5,13 +5,23 @@
 import sys
 import pyaudio
 import wave
-import struct
 import rospy
 import actionlib
-from lex_common_msgs.srv import *
 from std_msgs.msg import String
 from audio_common_msgs.msg import AudioData
 from cordial_manager.msg import *
+import boto3
+import json
+from datetime import datetime
+
+
+USER_ID = "manuela"
+BOT_NAME = "QTDemo"
+BOT_ALIAS = "qt_sample_demo" 
+
+SAMPLERATE = 16000
+FORMAT_SIZE = pyaudio.paInt16
+CHUNK_SIZE = 1024
 
 
 class DialogueServer():
@@ -69,6 +79,7 @@ def __init__(self):
 		self.dialogue_process_done = False
 		self.interaction_message = ""
 		self.interaction_continue = True
+		self.lex_client = boto3.client('lex-runtime', region_name='us-west-2')
 		# Declare subscribers and publishers
 	   	rospy.Subscriber('/cordial/microphone/audio', AudioData, self.handle_audio_data, queue_size=1)
 		self.text_publisher = rospy.Publisher('cordial/dialogue/script', String, queue_size=1)
@@ -79,57 +90,65 @@ def handle_audio_data(self, data):
 		self.prompt_message = data.data
 
 	def handle_lex_response(self,lex_response):
-		if len(lex_response.text_response) > 0:
-			print("The lex response is: ", lex_response)
-			#Stored the user data:
-			#if lex_response.intent_name == "questionnairePhase":
-			print(lex_response.slots) 
+		#lex_response = json.loads(lex_response)
+		if len(lex_response["message"]) > 0:
+			print("The lex response is: ", lex_response["message"])
+			#Stored the user data
+			self.dialogue_data_publisher.publish(str(lex_response["sessionAttributes"]))
 			#When lex failed in understanding the user
-			if lex_response.dialog_state == 'Failed':
+			if lex_response["dialogState"] == 'Failed':
 				self.interaction_message = 'failed_understanding'
 				self.interaction_continue = False
 				print("In Failed dialogue state")
-			elif lex_response.dialog_state == 'Fulfilled':
+			elif lex_response["dialogState"] == 'Fulfilled':
 				self.interaction_message = 'success'
 				self.interaction_continue = False
-				print("In Fulfilled dialogue state, the response is:" + lex_response.text_response)
-				self.text_publisher.publish(lex_response.text_response)
+				print("In Fulfilled dialogue state, the response is:" + lex_response["message"])
+				self.text_publisher.publish(lex_response["message"])
 			else:
-				print("In general dialogue state, the response is:" + lex_response.text_response)
-				self.text_publisher.publish(lex_response.text_response)
+				print("In general dialogue state, the response is:" + lex_response["message"])
+				self.text_publisher.publish(lex_response["message"])
 			self.dialogue_process_done = True
 
 	def send_audioToAWS_client(self,audiodatarequest):
 		print("Starting client request..")
 		#print("The audio data request is: ", audiodatarequest)
 		audiodata = audiodatarequest
-		rospy.wait_for_service("/lex_node/lex_conversation")
+		p = pyaudio.PyAudio()
+		file_name = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
+		outdir = "/home/qtrobot/catkin_ws/src/cordial-public/cordial_logger/scripts/data/audio_user"
+		wf = wave.open(outdir + "/"+ file_name + ".wav", 'wb')
+		wf.setnchannels(1)
+		wf.setsampwidth(p.get_sample_size(FORMAT_SIZE))
+		wf.setframerate(SAMPLERATE)
+		wf.setnframes(CHUNK_SIZE)
+		wf.writeframes(b''.join(audiodata))
+		wf.close()
+		wav_file = wave.open(outdir + "/"+ file_name + ".wav", 'rb')
 		try:
-			send_data_service = rospy.ServiceProxy("/lex_node/lex_conversation", AudioTextConversation)
-			lex_response = send_data_service(content_type='audio/x-l16; sample-rate=16000; channel-count=1',
-		                           accept_type='text/plain; charset=utf-8',
-		                           text_request=None,
-		                           audio_request=AudioData(data=audiodata))
+			lex_response = self.lex_client.post_content(botName = BOT_NAME,
+														botAlias = BOT_ALIAS,
+														userId = USER_ID,
+														contentType = 'audio/x-l16; sample-rate=16000; channel-count=1',
+														accept = 'text/plain; charset=utf-8',
+														inputStream = audiodata)
 			self.handle_lex_response(lex_response)
 		except rospy.ServiceException, e:
 			print "Service call failed: %s" %e
 
 	def send_textToAWS(self,textdatarequest):
 		print("Starting client request..")
-		#textdata = textdatarequest.data
-		textdata = textdatarequest # FOR TESTING
-		rospy.wait_for_service("/lex_node/lex_conversation")
+		textdata = textdatarequest
 		try:
-			send_data_service = rospy.ServiceProxy("/lex_node/lex_conversation", AudioTextConversation)
-			lex_response = send_data_service(content_type='text/plain; charset=utf-8',
-		                           accept_type='text/plain; charset=utf-8',
-		                           text_request= textdata,
-		                           audio_request= AudioData(data=''))
+			lex_response = self.lex_client.post_content(botName = BOT_NAME,
+														botAlias = BOT_ALIAS,
+														userId = USER_ID,
+														contentType = 'text/plain; charset=utf-8',
+														accept = 'text/plain; charset=utf-8',
+														inputStream = textdata)
 			self.handle_lex_response(lex_response)
 		except rospy.ServiceException, e:
 			print "Service call failed: %s" %e
-
-
 
 
 if __name__ == '__main__':

diff --git a/cordial_logger/scripts/recorded_data.py b/cordial_logger/scripts/recorded_data.py
@@ -11,84 +11,122 @@
 from sensor_msgs.msg import Image
 from audio_common_msgs.msg import AudioData
 from qt_robot_speaker.msg import PlayRequest
+from qt_nuitrack_app.msg import Skeletons
 from datetime import datetime
 import wave
 import pyaudio
-import base64
+import ast
 
 CHANNEL = 1
 SAMPLERATE = 16000
 FORMAT_SIZE = pyaudio.paInt16
 CHUNK_SIZE = 1024
+FPS = 23
 
 class RecordingManager():
 
 	def __init__(self):
-		self.recorded_audio_frames = []
+		self.recorded_audio_common_frames = []
 		self.recorded_video_frames = []
+		self.recorded_video_head_frames = []
+		self.script_data = []
 		self.first_video_frame = True
-		self.is_video_recording = True
-		self.is_audio_recording = True
+		self.first_video_head_frame = True
+		self.is_video_recording = False
+		self.is_audio_recording = False
+		self.is_data_recording = False
 		self.cv_bridge = CvBridge()
+		self.fps = FPS
 		rospy.Subscriber("/cordial/chest_camera/video", Image, self.handle_recorded_video, queue_size=1)
-		#rospy.Subscriber("/cordial/recording/audio/data", PlayRequest, self.handle_recorded_audio, queue_size=1)
-		rospy.Subscriber("/audio", AudioData, self.handle_recorded_audio_common, queue_size=1)
-		#rospy.Subscriber("/cordial/dialogue/data", String, self.handle_user_prompt, queue_size=1)
+		rospy.Subscriber("/audio/channel0", AudioData, self.handle_recorded_audio_common, queue_size=1)
+		rospy.Subscriber("/cordial/dialogue/data", String, self.handle_user_prompt, queue_size=1)
+		#audio direction
+		rospy.Subscriber("/camera/color/image_raw", Image, self.handle_recorded_video_head, queue_size=1)
+		rospy.Subscriber("/qt_nuitrack_app/skeletons", Skeletons, self.handle_recorded_skeleton_position, queue_size=1)
+
 		rospy.Subscriber("/cordial/recording/audio", Bool, self.handle_trigger_recorded_audio, queue_size=1)
 		rospy.Subscriber("/cordial/recording/video", Bool, self.handle_trigger_recorded_video, queue_size=1)
+		rospy.Subscriber("/cordial/recording/data", Bool, self.handle_trigger_recorded_data, queue_size=1)
 
 
+	def handle_recorded_skeleton_position(self, data):
+		return
+
+	def handle_user_prompt(self,data):
+		if self.is_data_recording:
+			transcript = ast.literal_eval(data.data)
+			self.script_data.append(transcript)
+		return
+
+	def handle_trigger_recorded_data(self, data):
+		if not data.data:
+			self.is_data_recording = False
+			file_name = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
+			outdir = "/home/qtrobot/catkin_ws/src/cordial-public/cordial_logger/scripts/data/logfile/script/"
+			with open(outdir + file_name+'.json', 'w') as outfile:
+				json.dump(self.script_data, outfile)
+		else:
+			self.is_data_recording = True
+
+
+	def handle_recorded_video_head(self,data):
+		if self.is_video_recording:
+			if self.first_video_head_frame:
+				print("First frame")
+				# Create the writer for the video
+				fps = data.header.stamp.secs
+				fourcc = cv2.VideoWriter_fourcc(*'MJPG')
+				file_name = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') 
+				path = "/home/qtrobot/catkin_ws/src/cordial-public/cordial_logger/scripts/data/video_head/"
+				#fps = FPS
+				self.video_head_data = cv2.VideoWriter(path + file_name + ".avi", fourcc , fps, (data.width, data.height), True)
+				frame = self.cv_bridge.imgmsg_to_cv2(data, "bgr8")
+				self.first_video_head_frame = False
+			else:
+				frame = self.cv_bridge.imgmsg_to_cv2(data, "bgr8")
+			try:
+				self.recorded_video_head_frames.append(frame)
+			except:
+				print("Not writing!!")
+
 	def handle_trigger_recorded_video(self, data):
 		if not data.data:
 			print("Stop video recording")
 			for frame in self.recorded_video_frames:
 				self.video_data.write(frame)
+			for frame in self.recorded_video_head_frames:
+				self.video_head_data.write(frame)
 			self.is_video_recording = False
 			self.video_data.release()
+			self.video_head_data.release()
 			cv2.destroyAllWindows()
-			#self.text_file.close()
+		elif data.data:
+			self.is_video_recording = True
+
 
 	def handle_recorded_video(self, data):
 		if self.is_video_recording:
 			if self.first_video_frame:
 				print("First frame")
 				# Create the writer for the video
-				fps = data.header.stamp.secs
+				#self.fps = data.header.stamp.secs
+				fps = FPS
 				fourcc = cv2.VideoWriter_fourcc(*'MJPG')
 				file_name = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') 
 				path = "/home/qtrobot/catkin_ws/src/cordial-public/cordial_logger/scripts/data/video/"
 				self.video_data = cv2.VideoWriter(path + file_name + ".avi", fourcc , fps, (data.width, data.height), True)
 				frame = self.cv_bridge.imgmsg_to_cv2(data, "rgb8")
 				self.first_video_frame = False
-				# Create the txt file to store the data
-				text_file = open(path+file_name+'.txt','wb')
-				text_file.write("Start")
-				text_file.close()
-				self.text_file = open(path+file_name+'.txt','a')
+
 			else:
 				frame = self.cv_bridge.imgmsg_to_cv2(data, "rgb8")
 			try:
 				self.recorded_video_frames.append(frame)
-				retval, buffer = cv2.imencode('.jpg', frame)
-				jpg_as_text = base64.b64encode(buffer)
-				#self.text_file.write('\n Frame:' + str(jpg_as_text))
 			except:
 				print("Not writing!!")
 
-
-	def handle_recorded_audio(self,data):
-		self.recorded_audio_frames.append(data.data)
-		self.sample_width = data.audio_frame
-
-
 	def handle_recorded_audio_common(self,data):
-		#print(data.data)
-		#P = 2
-		#S = data.data
-		#result = [x * P for x in S]
-		#print(result)
-		self.recorded_audio_frames.append(data.data)
-		#self.sample_width = data.audio_frame
+		self.recorded_audio_common_frames.append(data.data)
 
 	def handle_trigger_recorded_audio(self, data):
 		if not data.data:
@@ -101,8 +139,10 @@ def handle_trigger_recorded_audio(self, data):
 			wf.setsampwidth(p.get_sample_size(FORMAT_SIZE))
 			wf.setframerate(SAMPLERATE)
 			wf.setnframes(CHUNK_SIZE)
-			wf.writeframes(b''.join(self.recorded_audio_frames))
+			wf.writeframes(b''.join(self.recorded_audio_common_frames))
 			wf.close()
+		elif data.data:
+			self.is_audio_recording = True
 
 
 

diff --git a/cordial_sensor/scripts/sensor.py b/cordial_sensor/scripts/sensor.py
@@ -102,14 +102,16 @@ def __init__(self):
 		self.microphone_publisher = rospy.Publisher("cordial/listening", Bool, queue_size=1)
 		self.camera_record_publisher = rospy.Publisher("cordial/recording/video", Bool, queue_size=1)
 		self.microphone_record_publisher = rospy.Publisher("cordial/recording/audio", Bool, queue_size=1)
+		self.data_record_publisher = rospy.Publisher("cordial/recording/data", Bool, queue_size=1)
 
 
 	def handle_listening_start(self, data):
 		self.microphone_publisher.publish(True)
 
 	def handle_recording_start(self, data):
-		self.camera_record_publisher.publish(True)
+		#self.camera_record_publisher.publish(True)
 		self.microphone_record_publisher.publish(True)
+		self.data_record_publisher.publish(True)
 		return
 
 	def handle_listening_done(self,data):