@article{61123,
  abstract     = {{<jats:p>Knowledge graphs are used by a growing number of applications to represent structured data. Hence, evaluating the veracity of assertions in knowledge graphs—dubbed fact checking—is currently a challenge of growing importance. However, manual fact checking is commonly impractical due to the sheer size of knowledge graphs. This paper is a systematic survey of recent works on automatic fact checking with a focus on knowledge graphs. We present recent fact-checking approaches, the varied sources they use as background knowledge, and the features they rely upon. Finally, we draw conclusions pertaining to possible future research directions in fact checking knowledge graphs.</jats:p>}},
  author       = {{Qudus, Umair and Röder, Michael and Saleem, Muhammad and Ngonga Ngomo, Axel-Cyrille}},
  issn         = {{0360-0300}},
  journal      = {{ACM Computing Surveys}},
  keywords     = {{fact checking, knowledge graphs, fact-checkers, check worthiness, evidence retrieval, trust, veracity.}},
  publisher    = {{Association for Computing Machinery (ACM)}},
  title        = {{{Fact Checking Knowledge Graphs -- A Survey}}},
  doi          = {{10.1145/3749838}},
  volume       = {{58}},
  year         = {{2025}},
}

@inproceedings{61753,
  abstract     = {{This paper presents LOLA, a massively multilingual large language model trained on more than 160 languages using a sparse Mixture-of-Experts Transformer architecture. Our architectural and implementation choices address the challenge of harnessing linguistic diversity while maintaining efficiency and avoiding the common pitfalls of multilinguality. Our analysis of the evaluation results shows competitive performance in natural language generation and understanding tasks. Additionally, we demonstrate how the learned expert-routing mechanism exploits implicit phylogenetic linguistic patterns to potentially alleviate the curse of multilinguality. We provide an in-depth look at the training process, an analysis of the datasets, and a balanced exploration of the model{’}s strengths and limitations. As an open-source model, LOLA promotes reproducibility and serves as a robust foundation for future research. Our findings enable the development of compute-efficient multilingual models with strong, scalable performance across languages.}},
  author       = {{Srivastava, Nikit and Kuchelev, Denis and Moteu Ngoli, Tatiana and Shetty, Kshitij and Röder, Michael and Zahera, Hamada Mohamed Abdelsamee and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Proceedings of the 31st International Conference on Computational Linguistics}},
  editor       = {{Rambow, Owen and Wanner, Leo and Apidianaki, Marianna and Al-Khalifa, Hend and Eugenio, Barbara Di and Schockaert, Steven}},
  pages        = {{6420–6446}},
  publisher    = {{Association for Computational Linguistics}},
  title        = {{{LOLA – An Open-Source Massively Multilingual Large Language Model}}},
  year         = {{2025}},
}

@inbook{63507,
  author       = {{Pandit, Gaurav and Röder, Michael and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Lecture Notes in Computer Science}},
  isbn         = {{9783031945748}},
  issn         = {{0302-9743}},
  publisher    = {{Springer Nature Switzerland}},
  title        = {{{Evaluating Approximate Nearest Neighbour Search Systems on Knowledge Graph Embeddings}}},
  doi          = {{10.1007/978-3-031-94575-5_4}},
  year         = {{2025}},
}

@inproceedings{63572,
  author       = {{Demir, Caglar and Yekini, Moshood Olawale and Röder, Michael and Mahmood, Yasir and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Lecture Notes in Computer Science}},
  isbn         = {{9783032060655}},
  issn         = {{0302-9743}},
  location     = {{Porto}},
  publisher    = {{Springer Nature Switzerland}},
  title        = {{{Tree-Based OWL Class Expression Learner over Large Graphs}}},
  doi          = {{10.1007/978-3-032-06066-2_29}},
  year         = {{2025}},
}

@inproceedings{63575,
  author       = {{Kapoor, Sourabh and Sharma, Arnab and Röder, Michael and Demir, Caglar and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Lecture Notes in Computer Science}},
  isbn         = {{9783031945748}},
  issn         = {{0302-9743}},
  publisher    = {{Springer Nature Switzerland}},
  title        = {{{Robustness Evaluation of Knowledge Graph Embedding Models Under Non-targeted Attacks}}},
  doi          = {{10.1007/978-3-031-94575-5_15}},
  year         = {{2025}},
}

@inproceedings{63573,
  author       = {{Memariani, Adel and Röder, Michael and Sharma, Arnab and Demir, Caglar and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Lecture Notes in Computer Science}},
  isbn         = {{9783032095268}},
  issn         = {{0302-9743}},
  publisher    = {{Springer Nature Switzerland}},
  title        = {{{Link Prediction Under Non-targeted Attacks: Do Soft Labels Always Help?}}},
  doi          = {{10.1007/978-3-032-09527-5_6}},
  year         = {{2025}},
}

@inproceedings{63574,
  author       = {{Zhang, Quannian and Röder, Michael and Srivastava, Nikit and KOUAGOU, N'Dah Jean and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Proceedings of the Knowledge Capture Conference 2025}},
  publisher    = {{ACM}},
  title        = {{{Explainable Benchmarking through the Lense of Concept Learning}}},
  doi          = {{10.1145/3731443.3771359}},
  year         = {{2025}},
}

@inproceedings{56983,
  abstract     = {{Detecting the veracity of a statement automatically is a challenge the world is grappling with due to the vast amount of data spread across the web. Verifying a given claim typically entails validating it within the framework of supporting evidence like a retrieved piece of text. Classifying the stance of the text with respect to the claim is called stance classification. Despite advancements in automated fact-checking, most systems still rely on a substantial quantity of labeled training data, which can be costly. In this work, we avoid the costly training or fine-tuning of models by reusing pre-trained large language models together with few-shot in-context learning. Since we do not train any model, our approach ExPrompt is lightweight, demands fewer resources than other stance classification methods and can serve as a modern baseline for future developments. At the same time, our evaluation shows that our approach is able to outperform former state-of-the-art stance classification approaches regarding accuracy by at least 2 percent. Our scripts and data used in this paper are available at https://github.com/dice-group/ExPrompt.}},
  author       = {{Qudus, Umair and Röder, Michael and Vollmers, Daniel and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Proceedings of the 33rd ACM International Conference on Information and Knowledge Management}},
  isbn         = {{79-8-4007-0436-9/24/10}},
  keywords     = {{Stance Classification, Few-shot in-context learning, Pre-trained large language models}},
  location     = {{Boise, ID, USA}},
  pages        = {{3994 -- 3999}},
  publisher    = {{ACM}},
  title        = {{{ExPrompt: Augmenting Prompts Using Examples as Modern Baseline for Stance Classification}}},
  doi          = {{10.1145/3627673.3679923}},
  volume       = {{9}},
  year         = {{2024}},
}

@inproceedings{57240,
  abstract     = {{Validating assertions before adding them to a knowledge graph is an essential part of its creation and maintenance. Due to the sheer size of knowledge graphs, automatic fact-checking approaches have been developed. These approaches rely on reference knowledge to decide whether a given assertion is correct. Recent hybrid approaches achieve good results by including several knowledge sources. However, it is often impractical to provide a sheer quantity of textual knowledge or generate embedding models to leverage these hybrid approaches. We present FaVEL, an approach that uses algorithm selection and ensemble learning to amalgamate several existing fact-checking approaches that rely solely on a reference knowledge graph and, hence, use fewer resources than current hybrid approaches. For our evaluation, we create updated versions of two existing datasets and a new dataset dubbed FaVEL-DS. Our evaluation compares our approach to 15 fact-checking approaches—including the state-of-the-art approach HybridFC—on 3 datasets. Our results demonstrate that FaVEL outperforms all other approaches significantly by at least 0.04 in terms of the area under the ROC curve. Our source code, datasets, and evaluation results are open-source and can be found at https://github.com/dice-group/favel.}},
  author       = {{Qudus, Umair and Röder, Michael and Tatkeu Pekarou, Franck Lionel and Morim da Silva, Ana Alexandra and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{EKAW 2024}},
  editor       = {{Rospocher, Marco}},
  keywords     = {{fact checking, ensemble learning, transfer learning, knowledge management.}},
  location     = {{Amsterdam, Netherlands}},
  title        = {{{FaVEL: Fact Validation Ensemble Learning}}},
  year         = {{2024}},
}

@inproceedings{57278,
  author       = {{Morim da Silva, Ana Alexandra and Srivastava, Nikit and Moteu Ngoli, Tatiana and Röder, Michael and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Proceedings of the Seventh Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2024)}},
  publisher    = {{Association for Computational Linguistics}},
  title        = {{{Benchmarking Low-Resource Machine Translation Systems}}},
  doi          = {{10.18653/v1/2024.loresmt-1.18}},
  year         = {{2024}},
}

@inproceedings{50797,
  author       = {{Röder, Michael and Kuchelev, Denis and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Knowledge Graphs and Semantic Web}},
  editor       = {{Ortiz-Rodriguez, Fernando and Villazón-Terrazas, Boris and Tiwari, Sanju and Bobed, Carlos}},
  isbn         = {{978-3-031-47745-4}},
  keywords     = {{sail dice roeder kuchelev ngonga}},
  pages        = {{183–198}},
  publisher    = {{Springer Nature Switzerland}},
  title        = {{{A Topic Model for the Data Web}}},
  doi          = {{10.1007/978-3-031-47745-4_14}},
  year         = {{2023}},
}

@phdthesis{54607,
  author       = {{Röder, Michael}},
  keywords     = {{dice roeder}},
  publisher    = {{Paderborn University}},
  title        = {{{Automating the Discovery of Linking Candidates}}},
  doi          = {{10.17619/UNIPB/1-1666}},
  year         = {{2023}},
}

@inproceedings{50796,
  abstract     = {{Verifying assertions is an essential part of creating and maintaining knowledge graphs. Most often, this task cannot be carried out manually due to the sheer size of modern knowledge graphs. Hence, automatic fact-checking approaches have been proposed over the last decade. These approaches aim to compute automatically whether a given assertion is correct or incorrect. However, most fact-checking approaches are binary classifiers that fail to consider the volatility of some assertions, i.e., the fact that such assertions are only valid at certain times or for specific time intervals. Moreover, the few approaches able to predict when an assertion was valid (i.e., time-point prediction approaches) rely on manual feature engineering. This paper presents T EMPORAL FC, a temporal fact-checking approach that uses multiple sources of background knowledge to assess the veracity and temporal validity of a given assertion. We evaluate T EMPORAL FC
on two datasets and compare it to the state of the art in fact-checking and time-point prediction. Our results suggest that T EMPORAL FC outperforms the state of the art on the fact-checking task by 0.13 to 0.15 in terms of Area Under the
Receiver Operating Characteristic curve and on the time-point prediction task by 0.25 to 0.27 in terms of Mean Reciprocal Rank. Our code is open-source and can be found at https://github.com/dice-group/TemporalFC.}},
  author       = {{Qudus, Umair and Röder, Michael and Kirrane, Sabrina and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{The Semantic Web – ISWC 2023}},
  editor       = {{R. Payne, Terry and Presutti, Valentina and Qi, Guilin and Poveda-Villalónt, María and Stoilos, Giorgos and Hollink, Laura and Kaoudi, Zoi and Cheng, Gong and Li, Juanzi}},
  keywords     = {{knowgraphs enexa sail nebulaproject dice ngonga saleem roeder qudus}},
  pages        = {{465–483}},
  publisher    = {{Springer International Publishing}},
  title        = {{{TemporalFC: A Temporal Fact Checking approach over Knowledge Graphs}}},
  doi          = {{10.1007/978-3-031-47240-4_25}},
  volume       = {{14265}},
  year         = {{2023}},
}

@inproceedings{32509,
  abstract     = {{ We consider fact-checking approaches that aim to predict the veracity of assertions in knowledge graphs. Five main categories of fact-checking approaches for knowledge graphs have been proposed in the recent literature, of
which each is subject to partially overlapping limitations. In particular, current text-based approaches are limited by manual feature engineering. Path-based and rule-based approaches are limited by their exclusive use of knowledge graphs as background knowledge, and embedding-based approaches suffer from low accuracy scores on current fact-checking tasks. We propose a hybrid approach—dubbed HybridFC—that exploits the diversity of existing categories of fact-checking approaches within an ensemble learning setting to achieve a significantly better prediction performance. In particular, our approach outperforms the state of the art by 0.14 to 0.27 in terms of Area Under the Receiver Operating Characteristic curve on the FactBench dataset. Our code is open-source and can be found at https://github.com/dice-group/HybridFC.}},
  author       = {{Qudus, Umair and Röder, Michael and Saleem, Muhammad and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{The Semantic Web -- ISWC 2022}},
  editor       = {{Sattler, Ulrike and Hogan, Aidan and Keet, Maria and Presutti, Valentina}},
  isbn         = {{978-3-031-19433-7}},
  keywords     = {{fact checking · ensemble learning · knowledge graph veracit}},
  location     = {{Hanghzou, China}},
  pages        = {{462----480}},
  publisher    = {{Springer International Publishing}},
  title        = {{{HybridFC: A Hybrid Fact-Checking Approach for Knowledge Graphs}}},
  doi          = {{10.1007/978-3-031-19433-7_27}},
  year         = {{2022}},
}

@inproceedings{57287,
  author       = {{Syed, Zafar Habeeb and Srivastava, Nikit and Röder, Michael and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Proceedings of the ISWC 2019 Satellite Tracks (Posters & Demonstrations, Industry, and Outrageous Ideas)}},
  editor       = {{Suárez-Figueroa, Mari Carmen and Cheng, Gong and Gentile, Anna Lisa and Guéret, Christophe and Keet, Maria and Bernstein, Abraham}},
  keywords     = {{dice group_aksw ngonga roeder srivastava syed}},
  pages        = {{201–204}},
  publisher    = {{Springer International Publishing}},
  title        = {{{COPAAL – An Interface for Explaining Facts using Corroborative Paths}}},
  volume       = {{2456}},
  year         = {{2019}},
}

@inbook{57286,
  author       = {{Jalota, Rricha and Srivastava, Nikit and Vollmers, Daniel and Speck, René and Röder, Michael and Usbeck, Ricardo and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Rich Search and Discovery for Research Datasets}},
  keywords     = {{dice jalota ngonga roeder speck srivastava vollmers}},
  publisher    = {{SAGE Publications}},
  title        = {{{Finding Datasets in Publications: The University of Paderborn Approach}}},
  year         = {{2019}},
}

@inproceedings{57288,
  author       = {{Speck, René and Röder, Michael and Conrads, Felix and Rebba, Hyndavi and Romiyo, Catherine Camilla and Salakki, Gurudevi and Suryawanshi, Rutuja and Ahmed, Danish and Srivastava, Nikit and Mahajan, Mohit and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Semantic Web Evaluation Challenge}},
  keywords     = {{2018 ahmed conrads dice fox group_aksw mahajan ngonga projecthobbit rebba roeder romiyo salakki simba speck srivastava suryawanshi}},
  pages        = {{39–51}},
  publisher    = {{Springer International Publishing}},
  title        = {{{Open Knowledge Extraction Challenge 2018}}},
  year         = {{2018}},
}

@article{55062,
  author       = {{Ngomo, Axel-Cyrille Ngonga and Röder, Michael and Moussallem, Diego and Usbeck, Ricardo and Speck, René}},
  journal      = {{arXiv preprint arXiv:1710.08691}},
  title        = {{{BENGAL: an automatic benchmark generator for entity recognition and linking}}},
  year         = {{2017}},
}

@inproceedings{50761,
  author       = {{Röder, Michael and Both, Andreas and Hinneburg, Alexander}},
  booktitle    = {{Proceedings of the eight International Conference on Web Search and Data Mining, Shanghai, February 2-6}},
  keywords     = {{group_aksw SIMBA sys:relevantFor:infai sys:relevantFor:bis roeder palmetto}},
  title        = {{{Exploring the Space of Topic Coherence Measures}}},
  year         = {{2015}},
}

@inproceedings{55051,
  author       = {{Usbeck, Ricardo and Röder, Michael and Ngonga Ngomo, Axel-Cyrille and Baron, Ciro and Both, Andreas and Brümmer, Martin and Ceccarelli, Diego and Cornolti, Marco and Cherix, Didier and Eickmann, Bernd}},
  booktitle    = {{Proceedings of the 24th international conference on World Wide Web}},
  pages        = {{1133–1143}},
  title        = {{{GERBIL: general entity annotator benchmarking framework}}},
  year         = {{2015}},
}