@inproceedings{33734, abstract = {{Many applications require explainable node classification in knowledge graphs. Towards this end, a popular ``white-box'' approach is class expression learning: Given sets of positive and negative nodes, class expressions in description logics are learned that separate positive from negative nodes. Most existing approaches are search-based approaches generating many candidate class expressions and selecting the best one. However, they often take a long time to find suitable class expressions. In this paper, we cast class expression learning as a translation problem and propose a new family of class expression learning approaches which we dub neural class expression synthesizers. Training examples are ``translated'' into class expressions in a fashion akin to machine translation. Consequently, our synthesizers are not subject to the runtime limitations of search-based approaches. We study three instances of this novel family of approaches based on LSTMs, GRUs, and set transformers, respectively. An evaluation of our approach on four benchmark datasets suggests that it can effectively synthesize high-quality class expressions with respect to the input examples in approximately one second on average. Moreover, a comparison to state-of-the-art approaches suggests that we achieve better F-measures on large datasets. For reproducibility purposes, we provide our implementation as well as pretrained models in our public GitHub repository at https://github.com/dice-group/NeuralClassExpressionSynthesis}}, author = {{KOUAGOU, N'Dah Jean and Heindorf, Stefan and Demir, Caglar and Ngonga Ngomo, Axel-Cyrille}}, booktitle = {{The Semantic Web - 20th Extended Semantic Web Conference (ESWC 2023)}}, editor = {{Pesquita, Catia and Jimenez-Ruiz, Ernesto and McCusker, Jamie and Faria, Daniel and Dragoni, Mauro and Dimou, Anastasia and Troncy, Raphael and Hertling, Sven}}, keywords = {{Neural network, Concept learning, Description logics}}, location = {{Hersonissos, Crete, Greece}}, pages = {{209 -- 226}}, publisher = {{Springer International Publishing}}, title = {{{Neural Class Expression Synthesis}}}, doi = {{https://doi.org/10.1007/978-3-031-33455-9_13}}, volume = {{13870}}, year = {{2023}}, } @article{46243, author = {{Demir, Caglar and Ngonga Ngomo, Axel-Cyrille}}, journal = {{ECML-PKDD}}, location = {{Torino}}, title = {{{Clifford Embeddings – A Generalized Approach for Embedding in Normed Algebras}}}, year = {{2023}}, } @article{46251, author = {{Demir, Caglar and Ngonga Ngomo, Axel-Cyrille}}, journal = {{International Joint Conference on Artificial Intelligence}}, location = {{Macau}}, title = {{{Neuro-Symbolic Class Expression Learning}}}, year = {{2023}}, } @inbook{47421, abstract = {{Class expression learning in description logics has long been regarded as an iterative search problem in an infinite conceptual space. Each iteration of the search process invokes a reasoner and a heuristic function. The reasoner finds the instances of the current expression, and the heuristic function computes the information gain and decides on the next step to be taken. As the size of the background knowledge base grows, search-based approaches for class expression learning become prohibitively slow. Current neural class expression synthesis (NCES) approaches investigate the use of neural networks for class expression learning in the attributive language with complement (ALC). While they show significant improvements over search-based approaches in runtime and quality of the computed solutions, they rely on the availability of pretrained embeddings for the input knowledge base. Moreover, they are not applicable to ontologies in more expressive description logics. In this paper, we propose a novel NCES approach which extends the state of the art to the description logic ALCHIQ(D). Our extension, dubbed NCES2, comes with an improved training data generator and does not require pretrained embeddings for the input knowledge base as both the embedding model and the class expression synthesizer are trained jointly. Empirical results on benchmark datasets suggest that our approach inherits the scalability capability of current NCES instances with the additional advantage that it supports more complex learning problems. NCES2 achieves the highest performance overall when compared to search-based approaches and to its predecessor NCES. We provide our source code, datasets, and pretrained models at https://github.com/dice-group/NCES2.}}, author = {{Kouagou, N'Dah Jean and Heindorf, Stefan and Demir, Caglar and Ngonga Ngomo, Axel-Cyrille}}, booktitle = {{Machine Learning and Knowledge Discovery in Databases: Research Track}}, isbn = {{9783031434204}}, issn = {{0302-9743}}, location = {{Turin}}, publisher = {{Springer Nature Switzerland}}, title = {{{Neural Class Expression Synthesis in ALCHIQ(D)}}}, doi = {{10.1007/978-3-031-43421-1_12}}, year = {{2023}}, } @inbook{46460, author = {{Ngonga Ngomo, Axel-Cyrille and Demir, Caglar and Kouagou, N'Dah Jean and Heindorf, Stefan and Karalis, Nikoloas and Bigerl, Alexander}}, booktitle = {{Compendium of Neurosymbolic Artificial Intelligence}}, pages = {{272–286}}, publisher = {{IOS Press}}, title = {{{Class Expression Learning with Multiple Representations}}}, year = {{2023}}, } @article{46248, author = {{Demir, Caglar and Wiebesiek, Michel and Lu, Renzhong and Ngonga Ngomo, Axel-Cyrille and Heindorf, Stefan}}, journal = {{ECML PKDD}}, location = {{Torino}}, title = {{{LitCQD: Multi-Hop Reasoning in Incomplete Knowledge Graphs with Numeric Literals}}}, year = {{2023}}, } @unpublished{31545, abstract = {{Knowledge graph embedding research has mainly focused on learning continuous representations of entities and relations tailored towards the link prediction problem. Recent results indicate an ever increasing predictive ability of current approaches on benchmark datasets. However, this effectiveness often comes with the cost of over-parameterization and increased computationally complexity. The former induces extensive hyperparameter optimization to mitigate malicious overfitting. The latter magnifies the importance of winning the hardware lottery. Here, we investigate a remedy for the first problem. We propose a technique based on Kronecker decomposition to reduce the number of parameters in a knowledge graph embedding model, while retaining its expressiveness. Through Kronecker decomposition, large embedding matrices are split into smaller embedding matrices during the training process. Hence, embeddings of knowledge graphs are not plainly retrieved but reconstructed on the fly. The decomposition ensures that elementwise interactions between three embedding vectors are extended with interactions within each embedding vector. This implicitly reduces redundancy in embedding vectors and encourages feature reuse. To quantify the impact of applying Kronecker decomposition on embedding matrices, we conduct a series of experiments on benchmark datasets. Our experiments suggest that applying Kronecker decomposition on embedding matrices leads to an improved parameter efficiency on all benchmark datasets. Moreover, empirical evidence suggests that reconstructed embeddings entail robustness against noise in the input knowledge graph. To foster reproducible research, we provide an open-source implementation of our approach, including training and evaluation scripts as well as pre-trained models in our knowledge graph embedding framework.}}, author = {{Demir, Caglar and Lienen, Julian and Ngonga Ngomo, Axel-Cyrille}}, booktitle = {{arXiv:2205.06560}}, title = {{{Kronecker Decomposition for Knowledge Graph Embeddings}}}, year = {{2022}}, } @unpublished{31546, abstract = {{In semi-supervised learning, the paradigm of self-training refers to the idea of learning from pseudo-labels suggested by the learner itself. Across various domains, corresponding methods have proven effective and achieve state-of-the-art performance. However, pseudo-labels typically stem from ad-hoc heuristics, relying on the quality of the predictions though without guaranteeing their validity. One such method, so-called credal self-supervised learning, maintains pseudo-supervision in the form of sets of (instead of single) probability distributions over labels, thereby allowing for a flexible yet uncertainty-aware labeling. Again, however, there is no justification beyond empirical effectiveness. To address this deficiency, we make use of conformal prediction, an approach that comes with guarantees on the validity of set-valued predictions. As a result, the construction of credal sets of labels is supported by a rigorous theoretical foundation, leading to better calibrated and less error-prone supervision for unlabeled data. Along with this, we present effective algorithms for learning from credal self-supervision. An empirical study demonstrates excellent calibration properties of the pseudo-supervision, as well as the competitiveness of our method on several benchmark datasets.}}, author = {{Lienen, Julian and Demir, Caglar and Hüllermeier, Eyke}}, booktitle = {{arXiv:2205.15239}}, title = {{{Conformal Credal Self-Supervised Learning}}}, year = {{2022}}, } @inbook{33740, author = {{KOUAGOU, N'Dah Jean and Heindorf, Stefan and Demir, Caglar and Ngonga Ngomo, Axel-Cyrille}}, booktitle = {{The Semantic Web}}, isbn = {{9783031069802}}, issn = {{0302-9743}}, publisher = {{Springer International Publishing}}, title = {{{Learning Concept Lengths Accelerates Concept Learning in ALC}}}, doi = {{10.1007/978-3-031-06981-9_14}}, year = {{2022}}, } @inproceedings{29290, abstract = {{Classifying nodes in knowledge graphs is an important task, e.g., predicting missing types of entities, predicting which molecules cause cancer, or predicting which drugs are promising treatment candidates. While black-box models often achieve high predictive performance, they are only post-hoc and locally explainable and do not allow the learned model to be easily enriched with domain knowledge. Towards this end, learning description logic concepts from positive and negative examples has been proposed. However, learning such concepts often takes a long time and state-of-the-art approaches provide limited support for literal data values, although they are crucial for many applications. In this paper, we propose EvoLearner - an evolutionary approach to learn ALCQ(D), which is the attributive language with complement (ALC) paired with qualified cardinality restrictions (Q) and data properties (D). We contribute a novel initialization method for the initial population: starting from positive examples (nodes in the knowledge graph), we perform biased random walks and translate them to description logic concepts. Moreover, we improve support for data properties by maximizing information gain when deciding where to split the data. We show that our approach significantly outperforms the state of the art on the benchmarking framework SML-Bench for structured machine learning. Our ablation study confirms that this is due to our novel initialization method and support for data properties.}}, author = {{Heindorf, Stefan and Blübaum, Lukas and Düsterhus, Nick and Werner, Till and Golani, Varun Nandkumar and Demir, Caglar and Ngonga Ngomo, Axel-Cyrille}}, booktitle = {{WWW}}, pages = {{818--828}}, publisher = {{ACM}}, title = {{{EvoLearner: Learning Description Logics with Evolutionary Algorithms}}}, year = {{2022}}, } @inproceedings{25206, author = {{Demir, Caglar and Ngonga Ngomo, Axel-Cyrille}}, booktitle = {{The Semantic Web - 18th International Conference, {ESWC} 2021, Virtual Event, June 6-10, 2021, Proceedings}}, editor = {{Verborgh, Ruben and Hose, Katja and Paulheim, Heiko and Champin, Pierre{-}Antoine and Maleshkova, Maria and Corcho, Oscar and Ristoski, Petar and Alam, Mehwish}}, pages = {{409--424}}, publisher = {{Springer}}, title = {{{Convolutional Complex Knowledge Graph Embeddings}}}, doi = {{10.1007/978-3-030-77385-4\_24}}, volume = {{12731}}, year = {{2021}}, } @article{25209, author = {{Demir, Caglar and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}}, journal = {{CoRR}}, title = {{{A shallow neural model for relation prediction}}}, volume = {{abs/2101.09090}}, year = {{2021}}, } @article{25213, author = {{Sharma, Arnab and Demir, Caglar and Ngonga Ngomo, Axel-Cyrille and Wehrheim, Heike}}, journal = {{CoRR}}, title = {{{MLCheck- Property-Driven Testing of Machine Learning Models}}}, volume = {{abs/2105.00741}}, year = {{2021}}, } @article{25215, author = {{Demir, Caglar and Ngonga Ngomo, Axel-Cyrille}}, journal = {{CoRR}}, title = {{{Out-of-Vocabulary Entities in Link Prediction}}}, volume = {{abs/2105.12524}}, year = {{2021}}, } @article{25217, author = {{Demir, Caglar and Ngonga Ngomo, Axel-Cyrille}}, journal = {{CoRR}}, title = {{{DRILL- Deep Reinforcement Learning for Refinement Operators in ALC}}}, volume = {{abs/2106.15373}}, year = {{2021}}, } @inproceedings{28350, abstract = {{In recent years, we observe an increasing amount of software with machine learning components being deployed. This poses the question of quality assurance for such components: how can we validate whether specified requirements are fulfilled by a machine learned software? Current testing and verification approaches either focus on a single requirement (e.g., fairness) or specialize on a single type of machine learning model (e.g., neural networks). In this paper, we propose property-driven testing of machine learning models. Our approach MLCheck encompasses (1) a language for property specification, and (2) a technique for systematic test case generation. The specification language is comparable to property-based testing languages. Test case generation employs advanced verification technology for a systematic, property dependent construction of test suites, without additional user supplied generator functions. We evaluate MLCheck using requirements and data sets from three different application areas (software discrimination, learning on knowledge graphs and security). Our evaluation shows that despite its generality MLCheck can even outperform specialised testing approaches while having a comparable runtime}}, author = {{Sharma, Arnab and Demir, Caglar and Ngonga Ngomo, Axel-Cyrille and Wehrheim, Heike}}, booktitle = {{Proceedings of the 20th IEEE International Conference on Machine Learning and Applications (ICMLA)}}, publisher = {{IEEE}}, title = {{{MLCHECK–Property-Driven Testing of Machine Learning Classifiers}}}, year = {{2021}}, } @inproceedings{29287, abstract = {{Knowledge graph embedding research has mainly focused on the two smallest normed division algebras, $\mathbb{R}$ and $\mathbb{C}$. Recent results suggest that trilinear products of quaternion-valued embeddings can be a more effective means to tackle link prediction. In addition, models based on convolutions on real-valued embeddings often yield state-of-the-art results for link prediction. In this paper, we investigate a composition of convolution operations with hypercomplex multiplications. We propose the four approaches QMult, OMult, ConvQ and ConvO to tackle the link prediction problem. QMult and OMult can be considered as quaternion and octonion extensions of previous state-of-the-art approaches, including DistMult and ComplEx. ConvQ and ConvO build upon QMult and OMult by including convolution operations in a way inspired by the residual learning framework. We evaluated our approaches on seven link prediction datasets including WN18RR, FB15K-237 and YAGO3-10. Experimental results suggest that the benefits of learning hypercomplex-valued vector representations become more apparent as the size and complexity of the knowledge graph grows. ConvO outperforms state-of-the-art approaches on FB15K-237 in MRR, Hit@1 and Hit@3, while QMult, OMult, ConvQ and ConvO outperform state-of-the-approaches on YAGO3-10 in all metrics. Results also suggest that link prediction performances can be further improved via prediction averaging. To foster reproducible research, we provide an open-source implementation of approaches, including training and evaluation scripts as well as pretrained models.}}, author = {{Demir, Caglar and Moussallem, Diego and Heindorf, Stefan and Ngonga Ngomo, Axel-Cyrille}}, booktitle = {{The 13th Asian Conference on Machine Learning, ACML 2021}}, title = {{{Convolutional Hypercomplex Embeddings for Link Prediction}}}, year = {{2021}}, } @article{25350, author = {{Demir, Caglar and Ngonga Ngomo, Axel-Cyrille}}, journal = {{CoRR}}, title = {{{A Physical Embedding Model for Knowledge Graphs}}}, volume = {{abs/2001.07418}}, year = {{2020}}, }