@article{48543, abstract = {{Explanation has been identified as an important capability for AI-based systems, but research on systematic strategies for achieving understanding in interaction with such systems is still sparse. Negation is a linguistic strategy that is often used in explanations. It creates a contrast space between the affirmed and the negated item that enriches explaining processes with additional contextual information. While negation in human speech has been shown to lead to higher processing costs and worse task performance in terms of recall or action execution when used in isolation, it can decrease processing costs when used in context. So far, it has not been considered as a guiding strategy for explanations in human-robot interaction. We conducted an empirical study to investigate the use of negation as a guiding strategy in explanatory human-robot dialogue, in which a virtual robot explains tasks and possible actions to a human explainee to solve them in terms of gestures on a touchscreen. Our results show that negation vs. affirmation 1) increases processing costs measured as reaction time and 2) increases several aspects of task performance. While there was no significant effect of negation on the number of initially correctly executed gestures, we found a significantly lower number of attempts—measured as breaks in the finger movement data before the correct gesture was carried out—when being instructed through a negation. We further found that the gestures significantly resembled the presented prototype gesture more following an instruction with a negation as opposed to an affirmation. Also, the participants rated the benefit of contrastive vs. affirmative explanations significantly higher. Repeating the instructions decreased the effects of negation, yielding similar processing costs and task performance measures for negation and affirmation after several iterations. We discuss our results with respect to possible effects of negation on linguistic processing of explanations and limitations of our study.}}, author = {{Groß, A. and Singh, Amit and Banh, Ngoc Chi and Richter, B. and Scharlau, Ingrid and Rohlfing, Katharina J. and Wrede, B.}}, journal = {{Frontiers in Robotics and AI}}, keywords = {{HRI, XAI, negation, understanding, explaining, touch interaction, gesture}}, title = {{{Scaffolding the human partner by contrastive guidance in an explanatory human-robot dialogue}}}, doi = {{10.3389/frobt.2023.1236184}}, volume = {{10}}, year = {{2023}}, } @article{43437, abstract = {{In virtual reality (VR), participants may not always have hands, bodies, eyes, or even voices—using VR helmets and two controllers, participants control an avatar through virtual worlds that do not necessarily obey familiar laws of physics; moreover, the avatar’s bodily characteristics may not neatly match our bodies in the physical world. Despite these limitations and specificities, humans get things done through collaboration and the creative use of the environment. While multiuser interactive VR is attracting greater numbers of participants, there are currently few attempts to analyze the in situ interaction systematically. This paper proposes a video-analytic detail-oriented methodological framework for studying virtual reality interaction. Using multimodal conversation analysis, the paper investigates a nonverbal, embodied, two-person interaction: two players in a survival game strive to gesturally resolve a misunderstanding regarding an in-game mechanic—however, both of their microphones are turned off for the duration of play. The players’ inability to resort to complex language to resolve this issue results in a dense sequence of back-and-forth activity involving gestures, object manipulation, gaze, and body work. Most crucially, timing and modified repetitions of previously produced actions turn out to be the key to overcome both technical and communicative challenges. The paper analyzes these action sequences, demonstrates how they generate intended outcomes, and proposes a vocabulary to speak about these types of interaction more generally. The findings demonstrate the viability of multimodal analysis of VR interaction, shed light on unique challenges of analyzing interaction in virtual reality, and generate broader methodological insights about the study of nonverbal action.}}, author = {{Klowait, Nils}}, issn = {{2578-1863}}, journal = {{Human Behavior and Emerging Technologies}}, keywords = {{Human-Computer Interaction, General Social Sciences, Social Psychology, Virtual Reality : Multimodality, Nonverbal Interaction, Search Sequence, Gesture, Co-Operative Action, Goodwin, Ethnomethodology}}, pages = {{1--15}}, publisher = {{Hindawi Limited}}, title = {{{On the Multimodal Resolution of a Search Sequence in Virtual Reality}}}, doi = {{10.1155/2023/8417012}}, volume = {{2023}}, year = {{2023}}, } @inproceedings{17557, abstract = {{Previous work by [1] studied gesture-speech interaction in adults. [1] focussed on temporal and semantic coordination of gesture and speech and found that while adult speech is mostly coordinated (or redundant) with gestures, semantic coordination increases the temporal synchrony. These observations do not necessarily hold for children (in particular with respect to iconic gestures, see [2]), where the speech and gesture systems are still under development. We studied the semantic and temporal coordination of speech and gesture in 4-year old children using a corpus of 40 children producing action descriptions in task oriented dialogues. In particular, we examined what kinds of information are transmitted verbally vs. non-verbally and how they are related. To account for this, we extended the semantic features (SFs) developed in [3] for object descriptions in order to include the semantics of actions. We coded the SFs on the children’s speech and gestures separately using video data. In our presentation, we will focus on the quantitative distribution of SFs across gesture and speech. Our results indicate that speech and gestures of 4-year olds are less integrated than those of the adults, although there is a large variability among the children. We will discuss the results with respect to the cognitive processes (e.g., visual memory, language) underlying children’s abilities at this stage of development. Our work paves the way for the cognitive architecture of speech-gesture interaction in preschoolers which to our knowledge is missing so far. }}, author = {{Abramov, Olga and Kopp, Stefan and Nemeth, Anne and Kern, Friederike and Mertens, Ulrich and Rohlfing, Katharina}}, booktitle = {{KOGWIS2018: Computational Approaches to Cognitive Science}}, keywords = {{Speech-gesture integration, semantic features}}, title = {{{Towards a Computational Model of Child Gesture-Speech Production}}}, year = {{2018}}, } @inproceedings{17179, abstract = {{Previous work by [1] studied gesture-speech interaction in adults. [1] focussed on temporal and semantic coordination of gesture and speech and found that while adult speech is mostly coordinated (or redundant) with gestures, semantic coordination increases the temporal synchrony. These observations do not necessarily hold for children (in particular with respect to iconic gestures, see [2]), where the speech and gesture systems are still under development. We studied the semantic and temporal coordination of speech and gesture in 4-year old children using a corpus of 40 children producing action descriptions in task oriented dialogues. In particular, we examined what kinds of information are transmitted verbally vs. non-verbally and how they are related. To account for this, we extended the semantic features (SFs) developed in [3] for object descriptions in order to include the semantics of actions. We coded the SFs on the children’s speech and gestures separately using video data. In our presentation, we will focus on the quantitative distribution of SFs across gesture and speech. Our results indicate that speech and gestures of 4-year olds are less integrated than those of the adults, although there is a large variability among the children. We will discuss the results with respect to the cognitive processes (e.g., visual memory, language) underlying children’s abilities at this stage of development. Our work paves the way for the cognitive architecture of speech-gesture interaction in preschoolers which to our knowledge is missing so far. }}, author = {{Abramov, Olga and Kopp, Stefan and Nemeth, Anne and Kern, Friederike and Mertens, Ulrich and Rohlfing, Katharina}}, booktitle = {{KOGWIS2018: Computational Approaches to Cognitive Science}}, keywords = {{Speech-gesture integration, semantic features}}, title = {{{Towards a Computational Model of Child Gesture-Speech Production}}}, year = {{2018}}, } @article{17184, abstract = {{There is ongoing discussion on the function of the early production of gestures with regard to whether they reduce children's cognitive demands and free their capacity to perform other tasks (e.g., Goldin-Meadow & Wagner, 2005) or whether young children point in order to share their interest or to elicit information from their caregivers (e.g., Begus & Southgate, 2012; Liszkowski, Carpenter, Henning, Striano & Tomasello, 2004). The different assumptions lead to diverse predictions regarding infants' gestural or multimodal behavior in recurring situations, in which some objects are familiar and others are unfamiliar. To examine these different predictions, we observed 14 children aged between 14 and 16 months biweekly in a semi-experimental situation with a caregiver and explored how children's verbal and gestural behaviors change as a function of their familiarization with objects. We split the children into two groups based on their reported vocabulary size at 21 months of age (larger vs. smaller vocabulary). We found that children with a larger vocabulary at 21 months had an increase in their pointing with words toward unfamiliar objects as well as in their total amount of words, whereas for children with smaller vocabularies we did not find differences in relation to their familiarization with objects. We discuss these findings in terms of a social-pragmatic use of pointing gestures.}}, author = {{Grimminger, Angela and Lüke, Carina and Ritterfeld, Ute and Liszkowski, Ulf and Rohlfing, Katharina}}, issn = {{2191-9194}}, journal = {{Frühe Bildung}}, keywords = {{gesture, pointing, familiarity, individual differences}}, number = {{2}}, pages = {{91--97}}, publisher = {{Hogrefe & Huber Publishers}}, title = {{{Effekte von Objekt-Familiarisierung auf die frühe gestische Kommunikation. Individuelle Unterschiede in Hinblick auf den späteren Wortschatz}}}, doi = {{10.1026/2191-9186/a000257}}, volume = {{5}}, year = {{2016}}, } @article{17200, abstract = {{This research investigated infants’ online perception of give-me gestures during observation of a social interaction. In the first experiment, goal-directed eye movements of 12-month-olds were recorded as they observed a give-and-take interaction in which an object is passed from one individual to another. Infants’ gaze shifts from the passing hand to the receiving hand were significantly faster when the receiving hand formed a give-me gesture relative to when it was presented as an inverted hand shape. Experiment 2 revealed that infants’ goal-directed gaze shifts were not based on different affordances of the two receiving hands. Two additional control experiments further demonstrated that differences in infants’ online gaze behavior were not mediated by an attentional preference for the give-me gesture. Together, our findings provide evidence that properties of social action goals influence infants’ online gaze during action observation. The current studies demonstrate that infants have expectations about well-formed object transfer actions between social agents. We suggest that 12-month-olds are sensitive to social goals within the context of give-and-take interactions while observing from a third-party perspective.}}, author = {{Elsner, Claudia and Bakker, Marta and Rohlfing, Katharina and Gredebäck, Gustaf}}, issn = {{0022-0965}}, journal = {{Journal of Experimental Child Psychology}}, keywords = {{Give-me gesture, Infant, Anticipation, Eye movement, Gesture, Social interaction}}, pages = {{280--294}}, publisher = {{Elsevier BV}}, title = {{{Infants' online perception of give-and-take interactions}}}, doi = {{10.1016/j.jecp.2014.05.007}}, volume = {{126}}, year = {{2014}}, } @inproceedings{17259, abstract = {{Learning is a social endeavor, in which the learner generally receives support from his/her social partner(s). In developmental research – even though tutors/adults behavior modifications in their speech, gestures and motions have been extensively studied, studies barely consider the recipient’s (i.e. the child’s) perspective in the analysis of the adult’s presentation, In addition, the variability in parental behavior, i.e. the fact that not every parent modifies her/his behavior in the same way, found less fine-grained analysis. In contrast, in this paper, we assume an interactional perspective investigating the loop between the tutor’s and the learner’s actions. With this approach, we aim both at discovering the levels and features of variability and at achieving a better understanding of how they come about within the course of the interaction. For our analysis, we used a combination of (1) qualitative investigation derived from ethnomethodological Conversation Analysis (CA), (2) semi-automatic computational 2D hand tracking and (3) a mathematically based visualization of the data. Our analysis reveals that tutors not only shape their demonstrations differently with regard to the intended recipient per se (adult-directed vs. child-directed), but most importantly that the learner’s feedback during the presentation is consequential for the concrete ways in which the presentation is carried out.}}, author = {{Pitsch, Karola and Vollmer, Anna-Lisa and Fritsch, Jannik and Wrede, Britta and Rohlfing, Katharina and Sagerer, Gerhard}}, booktitle = {{Gesture and Speech in Interaction}}, keywords = {{gaze, gesture, Multimodal, adult-child interaction}}, title = {{{On the loop of action modification and the recipient's gaze in adult-child interaction}}}, year = {{2009}}, } @inproceedings{17278, abstract = {{This paper investigates the influence of feedback provided by an autonomous robot (BIRON) on users’ discursive behavior. A user study is described during which users show objects to the robot. The results of the experiment indicate, that the robot’s verbal feedback utterances cause the humans to adapt their own way of speaking. The changes in users’ verbal behavior are due to their beliefs about the robots knowledge and abilities. In this paper they are identified and grouped. Moreover, the data implies variations in user behavior regarding gestures. Unlike speech, the robot was not able to give feedback with gestures. Due to the lack of feedback, users did not seem to have a consistent mental representation of the robot’s abilities to recognize gestures. As a result, changes between different gestures are interpreted to be unconscious variations accompanying speech.}}, author = {{Lohse, Manja and Rohlfing, Katharina and Wrede, Britta and Sagerer, Gerhard}}, isbn = {{1050-4729}}, keywords = {{discursive behavior, autonomous robot, BIRON, man-machine systems, robot abilities, robot knowledge, user gestures, robot verbal feedback utterance, speech processing, user verbal behavior, service robots, human-robot interaction, human computer interaction, gesture recognition}}, pages = {{3481--3486}}, title = {{{“Try something else!” — When users change their discursive behavior in human-robot interaction}}}, doi = {{10.1109/ROBOT.2008.4543743}}, year = {{2008}}, }