@inproceedings{59054,
  author       = {{Firmansyah, Asep Fajar and Zahera, Hamada Mohamed Abdelsamee and Sherif, Mohamed and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{ESWC2025}},
  isbn         = {{978-3-031-94575-5}},
  keywords     = {{firmansyah mousallem ngonga sherif zahera}},
  pages        = {{133----151}},
  publisher    = {{pringer Nature Switzerland}},
  title        = {{{ANTS: Abstractive Entity Summarization in Knowledge Graphs}}},
  doi          = {{10.1007/978-3-031-94575-5_8}},
  year         = {{2025}},
}

@inproceedings{62119,
  author       = {{Ihtassine, Reda and Firmansyah, Asep Fajar and Srivastava, Nikit and Ali, Manzoor and Ngonga Ngomo, Axel-Cyrille and Sherif, Mohamed}},
  booktitle    = {{Proceedings of the 12th Knowledge Capture Conference 2025, {K-CAP} 2025, The Thirteenth International Conference on Knowledge Capture, December 10 - 12, 2025, Dayton, Ohio, USA}},
  keywords     = {{Srivastava ali dice enexa firmansyah ihtassine ngonga sailproject sherif whale}},
  publisher    = {{ACM}},
  title        = {{{NL2LS: LLM-based Automatic Linking of Knowledge Graphs}}},
  year         = {{2025}},
}

@inbook{61210,
  abstract     = {{Knowledge graphs (KGs) differ significantly over multiple different versions of the same data source. They also often contain blank nodes that do not have a constant identifier over all versions. Linking such blank nodes from different versions is a challenging task. Previous works propose different approaches to create signatures for all blank nodes based on named nodes in their neighborhood to match blank nodes with similar signatures. However, these works struggle to find a good mapping when the difference between the KGs’ versions grows too large. In this work, we propose Blink, an embedding-based approach for blank node linking. Blink merges two KGs’ versions and embeds the merged graph into a latent vector space based on translational embeddings and subsequently matches the closest pairs of blank nodes from different graphs. We evaluate our approach using real-world datasets against state-of-the-art approaches by computing the blank node matching for isomorphic graphs and graphs that contain triple changes (i.e., added or removed triples). The results indicate that Blink achieves perfect accuracy for isomorphic graphs. For graph versions that contain changes, such as having up to 20% of triples removed in one version, Blink still produces a mapping with an Optimal Mapping Deviation Ratio of under 1%. These results show that Blink leads to a better linking of KGs over different versions and similar graphs adhering to the linked data guidelines.}},
  author       = {{Becker, Alexander and Sherif, Mohamed and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Lecture Notes in Computer Science}},
  isbn         = {{9783031778438}},
  issn         = {{0302-9743}},
  location     = {{Baltimore, USA}},
  publisher    = {{Springer Nature Switzerland}},
  title        = {{{Blink: Blank Node Matching Using Embeddings}}},
  doi          = {{10.1007/978-3-031-77844-5_12}},
  year         = {{2024}},
}

@inproceedings{54084,
  author       = {{Karalis, Nikolaos and Bigerl, Alexander and Heidrich, Liss and Sherif, Mohamed and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{ESWC}},
  keywords     = {{bigerl dice enexa heidrich karalis ngonga sail sherif}},
  title        = {{{Efficient Evaluation of Conjunctive Regular Path Queries Using Multi-way Joins}}},
  year         = {{2024}},
}

@inproceedings{61219,
  author       = {{Kumar, Ajay and Naumann, Marius and Henne, Kevin and Sherif, Mohamed}},
  booktitle    = {{Joint Proceedings of Posters, Demos, Workshops, and Tutorials of the 20th International Conference on Semantic Systems co-located with 20th International Conference on Semantic Systems (SEMANTiCS 2024), Amsterdam, The Netherlands, September 17-19, 2024}},
  editor       = {{Garijo, Daniel and Gentile, Anna Lisa and Kurteva, Anelia and Mannocci, Andrea and Osborne, Francesco and Vahdati, Sahar}},
  keywords     = {{kumar sherif enexa climatebowl ingrid simba dice whale}},
  location     = {{ Amsterdam,The Netherlands}},
  publisher    = {{CEUR-WS.org}},
  title        = {{{PCFWebUI: Data-driven WebUI for holistic decarbonization based on PCF-Tracking}}},
  volume       = {{3759}},
  year         = {{2024}},
}

@inproceedings{55094,
  author       = {{Zahera, Hamada Mohamed Abdelsamee and Manzoor, Ali and Sherif, Mohamed and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{SEMANTiCS}},
  keywords     = {{TRR318 climatebowl colide dice enexa kiam manzoor moussallem ngonga sailproject sherif simba zahera}},
  title        = {{{Generating SPARQL from Natural Language Using Chain-of-Thoughts Prompting}}},
  year         = {{2024}},
}

@inbook{46516,
  abstract     = {{Linked knowledge graphs build the backbone of many data-driven applications such as search engines, conversational agents and e-commerce solutions. Declarative link discovery frameworks use complex link specifications to express the conditions under which a link between two resources can be deemed to exist. However, understanding such complex link specifications is a challenging task for non-expert users of link discovery frameworks. In this paper, we address this drawback by devising NMV-LS, a language model-based verbalization approach for translating complex link specifications into natural language. NMV-LS relies on the results of rule-based link specification verbalization to apply continuous training on T5, a large language model based on the Transformerarchitecture. We evaluated NMV-LS on English and German datasets using well-known machine translation metrics such as BLUE, METEOR, ChrF++ and TER. Our results suggest that our approach achieves a verbalization performance close to that of humans and outperforms state of the art approaches. Our source code and datasets are publicly available at https://github.com/dice-group/NMV-LS.}},
  author       = {{Ahmed, Abdullah Fathi Ahmed and Firmansyah, Asep Fajar and Sherif, Mohamed and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Natural Language Processing and Information Systems}},
  isbn         = {{9783031353192}},
  issn         = {{0302-9743}},
  publisher    = {{Springer Nature Switzerland}},
  title        = {{{Explainable Integration of Knowledge Graphs Using Large Language Models}}},
  doi          = {{10.1007/978-3-031-35320-8_9}},
  year         = {{2023}},
}

@inproceedings{54608,
  author       = {{Zahera, Hamada Mohamed Abdelsamee and Vitiugin, Fedor and Sherif, Mohamed and Castillo, Carlos and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{SEMANTiCS}},
  keywords     = {{dice kiam ngonga porque sherif zahera}},
  title        = {{{Using Pre-trained Language Models for Abstractive DBPEDIA Summarization: A Comparative Study}}},
  year         = {{2023}},
}

@inbook{54613,
  author       = {{Hanselle, Jonas Manuel and Hüllermeier, Eyke and Mohr, Felix and Ngonga Ngomo, Axel-Cyrille and Sherif, Mohamed and Tornede, Alexander and Wever, Marcel Dominik}},
  booktitle    = {{On-The-Fly Computing – Individualized IT-services in dynamic markets}},
  editor       = {{Haake, Claus-Jochen and Meyer auf der Heide, Friedhelm and Platzner, Marco and Wachsmuth, Henning and Wehrheim, Heike}},
  keywords     = {{dice ngonga sfb901 sherif}},
  pages        = {{85–104}},
  publisher    = {{Heinz Nixdorf Institut, Universität Paderborn}},
  title        = {{{Configuration and Evaluation}}},
  doi          = {{10.5281/zenodo.8068466}},
  volume       = {{412}},
  year         = {{2023}},
}

@inproceedings{54616,
  author       = {{Becker, Alexander and Ahmed, Abdullah Fathi Ahmed and Sherif, Mohamed and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{SEMANTiCS}},
  keywords     = {{ahmed becker dice ngonga sail sherif}},
  title        = {{{COBALT: A Content-Based Similarity Approach for Link Discovery over Geospatial Knowledge Graphs}}},
  year         = {{2023}},
}

@misc{45560,
  author       = {{Ali, Manzoor and Saleem, Muhammad and Moussallem, Diego and Sherif, Mohamed and Ngonga Ngomo, Axel-Cyrille}},
  publisher    = {{LibreCat University}},
  title        = {{{RELD: A Knowledge Graph of Relation Extraction Datasets}}},
  doi          = {{10.5281/ZENODO.7429677}},
  year         = {{2023}},
}

@inbook{45884,
  author       = {{Hanselle, Jonas Manuel and Hüllermeier, Eyke and Mohr, Felix and Ngonga Ngomo, Axel-Cyrille and Sherif, Mohamed and Tornede, Alexander and Wever, Marcel Dominik}},
  booktitle    = {{On-The-Fly Computing -- Individualized IT-services in dynamic markets}},
  editor       = {{Haake, Claus-Jochen and Meyer auf der Heide, Friedhelm and Platzner, Marco and Wachsmuth, Henning and Wehrheim, Heike}},
  pages        = {{85--104}},
  publisher    = {{Heinz Nixdorf Institut, Universität Paderborn}},
  title        = {{{Configuration and Evaluation}}},
  doi          = {{10.5281/zenodo.8068466}},
  volume       = {{412}},
  year         = {{2023}},
}

@inproceedings{46514,
  abstract     = {{Purpose: Data integration and applications across knowledge graphs (KGs) rely heavily on the discovery of links between resources within these KGs. Geospatial link discovery algorithms have to deal with millions of point sets containing billions of points. 
Methodology: To speed up the discovery of geospatial links, we propose COBALT. COBALT combines the content measures with R-tree indexing. The content measures are based on the area, diagonal and distance of the minimum bounding boxes of the polygons which speeds up the process but is not perfectly accurate. We thus propose two polygon splitting approaches for improving the accuracy of COBALT. 
Findings: Our experiments on real-world datasets show that COBALT is able to speed up the topological relation discovery over geospatial KGs by up to 1.47 × 104 times over state-of-the-art linking algorithms while maintaining an F-Measure between 0.7 and 0.9 depending on the relation. Furthermore, we were able to achieve an F-Measure of up to 0.99 by applying our polygon splitting approaches before applying the content measures. 
Value: The process of discovering links between geospatial resources can be significantly faster by sacrificing the optimality of the results. This is especially important for real time data-driven applications such as emergency response, location-based services and traffic management. In future work, additional measures, like the location of polygons or the name of the entity represented by the polygon, could be integrated to further improve the accuracy of the results.}},
  author       = {{Becker, Alexander and Ahmed, Abdullah Fathi Ahmed and Sherif, Mohamed and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{SEMANTiCS}},
  keywords     = {{ahmed becker dice ngonga sail sherif}},
  location     = {{Leipzig, Germany}},
  title        = {{{COBALT: A Content-Based Similarity Approach for Link Discovery over Geospatial Knowledge Graphs}}},
  year         = {{2023}},
}

@inproceedings{46518,
  abstract     = {{Purpose: This study addresses the limitations of current short abstracts of DBpedia entities, which often lack a comprehensive overview due to their creating method (i.e., selecting the first two-three sentences from the full DBpedia abstracts).
Methodology: We leverage pre-trained language models to generate abstractive summaries of DBpedia abstracts in six languages (English, French, German, Italian, Spanish, and Dutch). We performed several experiments to assess the quality of generated summaries by language models. In particular, we evaluated the generated summaries using human judgments and automated metrics (Self-ROUGE and BERTScore). Additionally, we studied the correlation between human judgments and automated metrics in evaluating the generated summaries under different aspects: informativeness, coherence, conciseness, and fluency.
Findings: Pre-trained language models generate summaries more concise and informative than existing short abstracts. Specifically, BART-based models effectively overcome the limitations of DBpedia short abstracts, especially for longer ones.
Moreover, we show that BERTScore and ROUGE-1 are reliable metrics for assessing the informativeness and coherence of the generated summaries with respect to the full DBpedia abstracts. We also find a negative correlation between conciseness and human ratings. Furthermore, fluency evaluation remains challenging without human judgment.
Value: This study has significant implications for various applications in machine learning and natural language processing that rely on DBpedia resources. By providing succinct and comprehensive summaries, our approach enhances the quality of DBpedia abstracts and contributes to the semantic web community}},
  author       = {{Zahera, Hamada Mohamed Abdelsamee and Vitiugin, Fedor and Sherif, Mohamed and Castillo, Carlos and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{SEMANTiCS}},
  keywords     = {{dice enexa kiam ngonga porque sherif zahera}},
  location     = {{Leipzig, Germany}},
  title        = {{{Using Pre-trained Language Models for Abstractive DBpedia Summarization: A Comparative Study}}},
  year         = {{2023}},
}

@misc{45558,
  abstract     = {{Graffiti is an urban phenomenon that is increasingly attracting the interest of the sciences. To the best of our knowledge, no suitable data corpora are available for systematic research until now. The Information System Graffiti in Germany project (Ingrid) closes this gap by dealing with graffiti image collections that have been made available to the project for public use. Within Ingrid, the graffiti images are collected, digitized and annotated. With this work, we aim to support the rapid access to a comprehensive data source on Ingrid targeted especially by researchers. In particular, we present IngridKG, an RDF knowledge graph of annotated graffiti, abides by the Linked Data and FAIR principles. We weekly update IngridKG by augmenting the new annotated graffiti to our knowledge graph. Our generation pipeline applies RDF data conversion, link discovery and data fusion approaches to the original data. The current version of IngridKG contains 460,640,154 triples and is linked to 3 other knowledge graphs by over 200,000 links. In our use case studies, we demonstrate the usefulness of our knowledge graph for different applications.}},
  author       = {{Sherif, Mohamed and Morim da Silva, Ana Alexandra and Pestryakova, Svetlana and Ahmed, Abdullah Fathi Ahmed and Niemann, Sven and Ngonga Ngomo, Axel-Cyrille}},
  publisher    = {{LibreCat University}},
  title        = {{{IngridKG: A FAIR Knowledge Graph of Graffiti}}},
  doi          = {{10.5281/ZENODO.7560242}},
  year         = {{2023}},
}

@inbook{54588,
  author       = {{Manzoor, Ali and Saleem, Muhammad and Moussallem, Diego and Sherif, Mohamed and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{The Semantic Web}},
  isbn         = {{9783031334542}},
  issn         = {{0302-9743}},
  publisher    = {{Springer Nature Switzerland}},
  title        = {{{RELD: A Knowledge Graph of Relation Extraction Datasets}}},
  doi          = {{10.1007/978-3-031-33455-9_20}},
  year         = {{2023}},
}

@inproceedings{31806,
  abstract     = {{The creation of an RDF knowledge graph for a particular application commonly involves a pipeline of tools that transform a set ofinput data sources into an RDF knowledge graph in a process called dataset augmentation. The components of such augmentation pipelines often require extensive configuration to lead to satisfactory results. Thus, non-experts are often unable to use them. Wepresent an efficient supervised algorithm based on genetic programming for learning knowledge graph augmentation pipelines of arbitrary length. Our approach uses multi-expression learning to learn augmentation pipelines able to achieve a high F-measure on the training data. Our evaluation suggests that our approach can efficiently learn a larger class of RDF dataset augmentation tasks than the state of the art while using only a single training example. Even on the most complex augmentation problem we posed, our approach consistently achieves an average F1-measure of 99% in under 500 iterations with an average runtime of 16 seconds}},
  author       = {{Dreßler, Kevin and Sherif, Mohamed and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Proceedings of the 33rd ACM Conference on Hypertext and Hypermedia}},
  keywords     = {{2022 RAKI SFB901 deer dice kevin knowgraphs limes ngonga sherif simba}},
  location     = {{Barcelona (Spain)}},
  title        = {{{ADAGIO - Automated Data Augmentation of Knowledge Graphs Using Multi-expression Learning}}},
  doi          = {{10.1145/3511095.3531287}},
  year         = {{2022}},
}

@article{29851,
  author       = {{Pestryakova, Svetlana  and Vollmers, Daniel and Sherif, Mohamed and Heindorf, Stefan and Saleem, Muhammad  and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  journal      = {{Scientific Data}},
  title        = {{{CovidPubGraph: A FAIR Knowledge Graph of COVID-19 Publications}}},
  doi          = {{10.1038/s41597-022-01298-2}},
  year         = {{2022}},
}

@inproceedings{46538,
  author       = {{Zahera, Hamada Mohamed Abdelsamee and Vollmers, Daniel and Sherif, Mohamed and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{ISWC 2022}},
  isbn         = {{978-3-031-19432-0}},
  keywords     = {{colide dice eml4u ngonga raki sherif speaker vollmers zahera}},
  publisher    = {{Springer, Cham}},
  title        = {{{MultPAX: Keyphrase Extraction using Language Models and Knowledge Graphs}}},
  doi          = {{10.1007/978-3-031-19433-7_18}},
  year         = {{2022}},
}

@article{29004,
  abstract     = {{The Linked Data paradigm builds upon the backbone of distributed knowledge bases connected by typed links. The mere volume of current knowledge bases as well as their sheer number pose two major challenges when aiming to support the computation of links across and within them. The first is that tools for link discovery have to be time-efficient when they compute links. Secondly, these tools have to produce links of high quality to serve the applications built upon Linked Data well. Solutions to the second problem build upon efficient computational approaches developed to solve the first and combine these with dedicated machine learning techniques. The current version of the LIMES framework is the product of seven years of research on these two challenges. A series of machine learning techniques and efficient computation approaches were developed and integrated into this framework to address the link discovery problem. The framework combines these diverse algorithms within a generic and extensible architecture. In this article, we give an overview of version 1.7.4 of the open-source release of the framework. In particular, we focus on an overview of the architecture of the framework, an intuition of its inner workings and a brief overview of the approaches it contains. Some descriptions of the applications within which the framework was used complete the paper. Our framework is open-source and available under a GNU license at https: //github.com/dice-group/LIMES together with a user manual and a developer manual.}},
  author       = {{Ngonga Ngomo, Axel-Cyrille and Sherif, Mohamed and Georgala, Kleanthi and Hassan, Mofeed and Dreßler, Kevin and Lyko, Klaus and Obraczka, Daniel and Soru, Tommaso}},
  journal      = {{KI - K{\"u}nstliche Intelligenz, German Journal of Artificial Intelligence - Organ des Fachbereichs "Künstliche Intelligenz" der Gesellschaft für Informatik e.V.}},
  keywords     = {{2021 dice simba sherif limes ngonga knowgraphs sys:relevantFor:limboproject limboproject sys:relevantFor:infai sys:relevantFor:bis limbo opal georgala kevin slipo sage}},
  publisher    = {{Springer}},
  title        = {{{LIMES - A Framework for Link Discovery on the Semantic Web}}},
  doi          = {{10.1007/s13218-021-00713-x}},
  year         = {{2021}},
}

