@article{61156,
  abstract     = {{Explainability has become an important topic in computer science and artificial intelligence, leading to a subfield called Explainable Artificial Intelligence (XAI). The goal of providing or seeking explanations is to achieve (better) ‘understanding’ on the part of the explainee. However, what it means to ‘understand’ is still not clearly defined, and the concept itself is rarely the subject of scientific investigation. This conceptual article aims to present a model of forms of understanding for XAI-explanations and beyond. From an interdisciplinary perspective bringing together computer science, linguistics, sociology, philosophy and psychology, a definition of understanding and its forms, assessment, and dynamics during the process of giving everyday explanations are explored. Two types of understanding are considered as possible outcomes of explanations, namely enabledness, ‘knowing how’ to do or decide something, and comprehension, ‘knowing that’ – both in different degrees (from shallow to deep). Explanations regularly start with shallow understanding in a specific domain and can lead to deep comprehension and enabledness of the explanandum, which we see as a prerequisite for human users to gain agency. In this process, the increase of comprehension and enabledness are highly interdependent. Against the background of this systematization, special challenges of understanding in XAI are discussed.}},
  author       = {{Buschmeier, Hendrik and Buhl, Heike M. and Kern, Friederike and Grimminger, Angela and Beierling, Helen and Fisher, Josephine Beryl and Groß, André and Horwath, Ilona and Klowait, Nils and Lazarov, Stefan Teodorov and Lenke, Michael and Lohmer, Vivien and Rohlfing, Katharina and Scharlau, Ingrid and Singh, Amit and Terfloth, Lutz and Vollmer, Anna-Lisa and Wang, Yu and Wilmes, Annedore and Wrede, Britta}},
  journal      = {{Cognitive Systems Research}},
  keywords     = {{understanding, explaining, explanations, explainable, AI, interdisciplinarity, comprehension, enabledness, agency}},
  title        = {{{Forms of Understanding for XAI-Explanations}}},
  doi          = {{10.1016/j.cogsys.2025.101419}},
  volume       = {{94}},
  year         = {{2025}},
}

@inproceedings{57204,
  abstract     = {{In this study on the use of gesture deixis during explanations, a sample of 24 videorecorded dyadic interactions of a board game explanation was analyzed. The relation between the use of gesture deixis by different explainers and their interpretation of explainees' understanding was investigated. In addition, we describe explainers' intra-individual variations related to their interactions with three different explainees consecutively. While we did not find a relation between interpretations of explainees' complete understanding and a decrease in explainers' use of gesture deixis, we demonstrated that the overall use of gesture deixis is related to the process of interactional monitoring and the attendance of a different explainee.}},
  author       = {{Lazarov, Stefan Teodorov and Grimminger, Angela}},
  booktitle    = {{Proceedings of the Annual Meeting of the Cognitive Science Society}},
  keywords     = {{explanation, gesture deixis, monitoring, understanding}},
  location     = {{Rotterdam}},
  title        = {{{Variations in explainers’ gesture deixis in explanations related to the monitoring of explainees’ understanding}}},
  volume       = {{46}},
  year         = {{2024}},
}

@inproceedings{61273,
  abstract     = {{In human-machine explanation interactions, such as tutoring systems or customer support chatbots, it is important for the machine explainer to infer the human user's understanding.  Nonverbal signals play an important role for expressing mental states like understanding and confusion in these interactions. However, an individual's expressions may vary depending on other factors. In cases where these factors are unknown, machine learning methods that infer understanding from nonverbal cues become unreliable. Stress for example has been shown to affect human expression, but it is not clear from the current research how stress affects the expression of understanding.
To address this gap, we design a paradigm that induces understanding and confusion through game rule explanations. During the explanations, self-perceived understanding and confusion are annotated by the participants. A stress condition is also introduced to enable the investigation of changes in the expression of social signals under stress.
We conducted a study to validate the stress induction and participants reported a statistically significant increase in stress during the stress condition compared to the neutral control condition. 
Additionally, feedback from participants shows that the paradigm is effective in inducing understanding and confusion. 
This paradigm paves the way for further studies investigating social signals of understanding to improve human-machine explanation interactions for varying contexts.}},
  author       = {{Paletschek, Jonas}},
  booktitle    = {{12th International Conference on  Affective Computing & Intelligent Interaction}},
  keywords     = {{Understanding, Nonverbal Social Signals, Stress Induction, Explanation, Machine Learning Bias}},
  location     = {{Glasgow}},
  publisher    = {{IEEE}},
  title        = {{{A Paradigm to Investigate Social Signals of Understanding and Their Susceptibility to Stress}}},
  doi          = {{10.1109/ACII63134.2024.00040}},
  year         = {{2024}},
}

@article{48543,
  abstract     = {{Explanation has been identified as an important capability for AI-based systems, but research on systematic strategies for achieving understanding in interaction with such systems is still sparse. Negation is a linguistic strategy that is often used in explanations. It creates a contrast space between the affirmed and the negated item that enriches explaining processes with additional contextual information. While negation in human speech has been shown to lead to higher processing costs and worse task performance in terms of recall or action execution when used in isolation, it can decrease processing costs when used in context. So far, it has not been considered as a guiding strategy for explanations in human-robot interaction. We conducted an empirical study to investigate the use of negation as a guiding strategy in explanatory human-robot dialogue, in which a virtual robot explains tasks and possible actions to a human explainee to solve them in terms of gestures on a touchscreen. Our results show that negation vs. affirmation 1) increases processing costs measured as reaction time and 2) increases several aspects of task performance. While there was no significant effect of negation on the number of initially correctly executed gestures, we found a significantly lower number of attempts—measured as breaks in the finger movement data before the correct gesture was carried out—when being instructed through a negation. We further found that the gestures significantly resembled the presented prototype gesture more following an instruction with a negation as opposed to an affirmation. Also, the participants rated the benefit of contrastive vs. affirmative explanations significantly higher. Repeating the instructions decreased the effects of negation, yielding similar processing costs and task performance measures for negation and affirmation after several iterations. We discuss our results with respect to possible effects of negation on linguistic processing of explanations and limitations of our study.}},
  author       = {{Groß, A. and Singh, Amit and Banh, Ngoc Chi and Richter, B. and Scharlau, Ingrid and Rohlfing, Katharina J. and Wrede, B.}},
  journal      = {{Frontiers in Robotics and AI}},
  keywords     = {{HRI, XAI, negation, understanding, explaining, touch interaction, gesture}},
  title        = {{{Scaffolding the human partner by contrastive guidance in an explanatory human-robot dialogue}}},
  doi          = {{10.3389/frobt.2023.1236184}},
  volume       = {{10}},
  year         = {{2023}},
}

@inproceedings{46067,
  abstract     = {{<p>The study investigates two different ways of guiding the addressee of an explanation - an explainee, through action demonstration: contrastive and non-contrastive. Their effect was tested on attention to specific action elements (goal) as well as on event memory. In an eye-tracking experiment, participants were shown different motion videos that were either contrastive or non-contrastive with respect to the segments of movement presentation. Given that everyday action demonstration is often multimodal, the stimuli were created with re- spect to their visual and verbal presentation. For visual presentation, a video combined two movements in a contrastive (e.g., Up-motion following a Down-motion) or non-contrastive way (e.g., two Up-motions following each other). For verbal presentation, each video was combined with a sequence of instruction descriptions in the form of negative (i.e., contrastive) or assertive (i.e., non-contrastive) guidance. It was found that a) attention to the event goal increased for this condition in the later time window, and b) participants’ recall of the event was facilitated when a visually contrastive motion was combined with a verbal contrast.</p>}},
  author       = {{Singh, Amit and Rohlfing, Katharina J.}},
  booktitle    = {{Proceedings of the Annual Meeting of the Cognitive Science Society 45 (45)}},
  keywords     = {{Attention, negation, contrastive  guidance, eye-movements, action understanding, event representation}},
  location     = {{Sydney}},
  publisher    = {{Cognitive Science Society}},
  title        = {{{Contrastiveness in the context of action demonstration: an eye-tracking study on its effects on action perception and action recall}}},
  year         = {{2023}},
}

@article{17189,
  abstract     = {{Alignment is a phenomenon observed in human conversation: Dialog partners' behavior converges in many respects. Such alignment has been proposed to be automatic and the basis for communicating successfully. Recent research on human-computer dialog promotes a mediated communicative design account of alignment according to which the extent of alignment is influenced by interlocutors' beliefs about each other. Our work aims at adding to these findings in two ways. (a) Our work investigates alignment of manual actions, instead of lexical choice. (b) Participants interact with the iCub humanoid robot, instead of an artificial computer dialog system. Our results confirm that alignment also takes place in the domain of actions. We were not able to replicate the results of the original study in general in this setting, but in accordance with its findings, participants with a high questionnaire score for emotional stability and participants who are familiar with robots align their actions more to a robot they believe to be basic than to one they believe to be advanced. Regarding alignment over the course of an interaction, the extent of alignment seems to remain constant, when participants believe the robot to be advanced, but it increases over time, when participants believe the robot to be a basic version.}},
  author       = {{Vollmer, Anna-Lisa and Rohlfing, Katharina and Wrede, Britta and Cangelosi, Angelo}},
  issn         = {{1875-4791}},
  journal      = {{International Journal of Social Robotics}},
  keywords     = {{learning, Human-robot interaction, Alignment, Robot social, Action understanding}},
  number       = {{2}},
  pages        = {{241--252}},
  publisher    = {{Springer-Verlag}},
  title        = {{{Alignment to the Actions of a Robot}}},
  doi          = {{10.1007/s12369-014-0252-0}},
  volume       = {{7}},
  year         = {{2015}},
}

