@inproceedings{57324,
  abstract     = {{Generating SPARQL queries is crucial for extracting relevant information from diverse knowledge graphs. However, the structural and semantic differences among these graphs necessitate training or fine-tuning a tailored model for each one. In this paper, we propose UniQ-Gen, a unified query generation approach to generate SPARQL queries across various knowledge graphs. UniQ-Gen integrates entity recognition, disambiguation, and linking through a BERT-NER model and employs cross-encoder ranking to align questions with the Freebase ontology. We conducted several experiments on different benchmark datasets such as LC-QuAD 2.0, GrailQA, and QALD-10. The evaluation results demonstrate that our approach achieves performance equivalent to or better than models fine-tuned for individual knowledge graphs. This finding suggests that fine-tuning a unified model on a heterogeneous dataset of SPARQL queries across different knowledge graphs eliminates the need for separate models for each graph, thereby reducing resource requirements.}},
  author       = {{Vollmers, Daniel and Srivastava, Nikit and Zahera, Hamada Mohamed Abdelsamee and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Knowledge Engineering and Knowledge Management}},
  editor       = {{Alam, Mehwish and Rospocher, Marco and van Erp, Marieke and Hollink, Laura and Gesese, Genet Asefa}},
  isbn         = {{978-3-031-77792-9}},
  pages        = {{174–189}},
  publisher    = {{Springer Nature Switzerland}},
  title        = {{{UniQ-Gen: Unified Query Generation Across Multiple Knowledge Graphs}}},
  doi          = {{https://doi.org/10.1007/978-3-031-77792-9_11}},
  year         = {{2025}},
}

@inproceedings{59054,
  author       = {{Firmansyah, Asep Fajar and Zahera, Hamada Mohamed Abdelsamee and Sherif, Mohamed and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{ESWC2025}},
  isbn         = {{978-3-031-94575-5}},
  keywords     = {{firmansyah mousallem ngonga sherif zahera}},
  pages        = {{133----151}},
  publisher    = {{pringer Nature Switzerland}},
  title        = {{{ANTS: Abstractive Entity Summarization in Knowledge Graphs}}},
  doi          = {{10.1007/978-3-031-94575-5_8}},
  year         = {{2025}},
}

@article{61134,
  author       = {{Manzoor, Ali and Speck, René and Zahera, Hamada Mohamed Abdelsamee and Saleem, Muhammad and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  issn         = {{2169-3536}},
  journal      = {{IEEE Access}},
  pages        = {{1--1}},
  publisher    = {{Institute of Electrical and Electronics Engineers (IEEE)}},
  title        = {{{Multilingual Relation Extraction - A Survey}}},
  doi          = {{10.1109/access.2025.3604258}},
  year         = {{2025}},
}

@inproceedings{61753,
  abstract     = {{This paper presents LOLA, a massively multilingual large language model trained on more than 160 languages using a sparse Mixture-of-Experts Transformer architecture. Our architectural and implementation choices address the challenge of harnessing linguistic diversity while maintaining efficiency and avoiding the common pitfalls of multilinguality. Our analysis of the evaluation results shows competitive performance in natural language generation and understanding tasks. Additionally, we demonstrate how the learned expert-routing mechanism exploits implicit phylogenetic linguistic patterns to potentially alleviate the curse of multilinguality. We provide an in-depth look at the training process, an analysis of the datasets, and a balanced exploration of the model{’}s strengths and limitations. As an open-source model, LOLA promotes reproducibility and serves as a robust foundation for future research. Our findings enable the development of compute-efficient multilingual models with strong, scalable performance across languages.}},
  author       = {{Srivastava, Nikit and Kuchelev, Denis and Moteu Ngoli, Tatiana and Shetty, Kshitij and Röder, Michael and Zahera, Hamada Mohamed Abdelsamee and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Proceedings of the 31st International Conference on Computational Linguistics}},
  editor       = {{Rambow, Owen and Wanner, Leo and Apidianaki, Marianna and Al-Khalifa, Hend and Eugenio, Barbara Di and Schockaert, Steven}},
  pages        = {{6420–6446}},
  publisher    = {{Association for Computational Linguistics}},
  title        = {{{LOLA – An Open-Source Massively Multilingual Large Language Model}}},
  year         = {{2025}},
}

@inbook{54412,
  author       = {{Firmansyah, Asep Fajar and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{The Semantic Web}},
  isbn         = {{9783031606250}},
  issn         = {{0302-9743}},
  publisher    = {{Springer Nature Switzerland}},
  title        = {{{ESLM: Improving Entity Summarization by Leveraging Language Models}}},
  doi          = {{10.1007/978-3-031-60626-7_9}},
  year         = {{2024}},
}

@inproceedings{55095,
  author       = {{Manzoor, Ali and Sohail Nisar, Muhammad and Muhammad, Saleem and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{NLDB24}},
  keywords     = {{Nisar SFB-TRR Saleem climate_bOWL colide dice enexa kiam manzoor moussallem ngonga}},
  publisher    = {{Springer International Publishing}},
  title        = {{{Enhancing Relation Extraction Through Augmented Data: Large Language Models Unleashed}}},
  year         = {{2024}},
}

@inbook{56214,
  author       = {{Li, Jiayi and Satheesh, Sheetal and Heindorf, Stefan and Moussallem, Diego and Speck, René and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Communications in Computer and Information Science}},
  isbn         = {{9783031637865}},
  issn         = {{1865-0929}},
  location     = {{Malta, Valletta}},
  publisher    = {{Springer Nature Switzerland}},
  title        = {{{AutoCL: AutoML for Concept Learning}}},
  doi          = {{10.1007/978-3-031-63787-2_7}},
  year         = {{2024}},
}

@inbook{56581,
  abstract     = {{In recent years, there has been a surge in natural language processing research focused on low-resource languages (LrLs), underscoring the growing recognition that LrLs deserve the same attention as high-resource languages (HrLs). This shift is crucial for ensuring linguistic diversity and inclusivity in the digital age. Despite Indonesian ranking as the 11th most spoken language globally, it remains under-resourced in terms of computational tools and datasets. Within the semantic web domain, Entity Linking (EL) is pivotal, linking textual entity mentions to their corresponding entries in knowledge bases. This process is foundational for advanced information extraction tasks, including relation extraction and event detection. To bolster EL research in Indonesian, we introduce IndEL, the first benchmark dataset tailored for both general and specific domains. IndEL was manually curated using Wikidata, adhering to a rigorous set of annotation guidelines. We used two Named Entity Recognition (NER) benchmark datasets for entity extraction: NER UI for the general domain and IndQNER for the specific domain. IndQNER focused on entities from the Indonesian translation of the Quran. IndEL comprises 4765 entities in the general domain and 2453 in the specific domain. Using the GERBIL framework, we use IndEL to evaluate the performance of various EL systems, such as Babelfy, DBpedia Spotlight, MAG, OpenTapioca, and WAT. Our further investigation reveals that within Wikidata, a significant number of NIL entities remain unlinked due to the limited number of Indonesian labels and the use of acronyms. Especially in the specific domain, transliteration and translation processes performed to create the Indonesian translation of the Quran contribute to the presence of entities in a descriptive form and as synonyms.}},
  author       = {{Gusmita, Ria Hari and Abshar, Muhammad Faruq Amiral and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Lecture Notes in Computer Science}},
  isbn         = {{9783031702389}},
  issn         = {{0302-9743}},
  keywords     = {{entity linking benchmark dataset, Indonesian, general and specific domains}},
  location     = {{Turin, Italy}},
  publisher    = {{Springer Nature Switzerland}},
  title        = {{{IndEL: Indonesian Entity Linking Benchmark Dataset for General and Specific Domains}}},
  doi          = {{10.1007/978-3-031-70239-6_34}},
  year         = {{2024}},
}

@unpublished{57277,
  author       = {{Srivastava, Nikit and Ma, Mengshi and Vollmers, Daniel and Zahera, Hamada Mohamed Abdelsamee and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  title        = {{{MST5 -- Multilingual Question Answering over Knowledge Graphs}}},
  year         = {{2024}},
}

@inproceedings{55094,
  author       = {{Zahera, Hamada Mohamed Abdelsamee and Manzoor, Ali and Sherif, Mohamed and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{SEMANTiCS}},
  keywords     = {{TRR318 climatebowl colide dice enexa kiam manzoor moussallem ngonga sailproject sherif simba zahera}},
  title        = {{{Generating SPARQL from Natural Language Using Chain-of-Thoughts Prompting}}},
  year         = {{2024}},
}

@inproceedings{57278,
  author       = {{Morim da Silva, Ana Alexandra and Srivastava, Nikit and Moteu Ngoli, Tatiana and Röder, Michael and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Proceedings of the Seventh Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2024)}},
  publisher    = {{Association for Computational Linguistics}},
  title        = {{{Benchmarking Low-Resource Machine Translation Systems}}},
  doi          = {{10.18653/v1/2024.loresmt-1.18}},
  year         = {{2024}},
}

@inbook{46516,
  abstract     = {{Linked knowledge graphs build the backbone of many data-driven applications such as search engines, conversational agents and e-commerce solutions. Declarative link discovery frameworks use complex link specifications to express the conditions under which a link between two resources can be deemed to exist. However, understanding such complex link specifications is a challenging task for non-expert users of link discovery frameworks. In this paper, we address this drawback by devising NMV-LS, a language model-based verbalization approach for translating complex link specifications into natural language. NMV-LS relies on the results of rule-based link specification verbalization to apply continuous training on T5, a large language model based on the Transformerarchitecture. We evaluated NMV-LS on English and German datasets using well-known machine translation metrics such as BLUE, METEOR, ChrF++ and TER. Our results suggest that our approach achieves a verbalization performance close to that of humans and outperforms state of the art approaches. Our source code and datasets are publicly available at https://github.com/dice-group/NMV-LS.}},
  author       = {{Ahmed, Abdullah Fathi Ahmed and Firmansyah, Asep Fajar and Sherif, Mohamed and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Natural Language Processing and Information Systems}},
  isbn         = {{9783031353192}},
  issn         = {{0302-9743}},
  publisher    = {{Springer Nature Switzerland}},
  title        = {{{Explainable Integration of Knowledge Graphs Using Large Language Models}}},
  doi          = {{10.1007/978-3-031-35320-8_9}},
  year         = {{2023}},
}

@inproceedings{54614,
  author       = {{Srivastava, Nikit and Perevalov, Aleksandr and Kuchelev, Denis and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille and Both, Andreas}},
  booktitle    = {{Proceedings of the 12th Knowledge Capture Conference 2023, {K-CAP} 2023, Pensacola, FL, USA, December 5-7, 2023}},
  editor       = {{Venable, Kristen Brent and Garijo, Daniel and Jalaian, Brian}},
  keywords     = {{dice kuchelev moussallem ngonga srivastava}},
  pages        = {{122–130}},
  publisher    = {{ACM}},
  title        = {{{Lingua Franca - Entity-Aware Machine Translation Approach for Question Answering over Knowledge Graphs}}},
  doi          = {{10.1145/3587259.3627567}},
  year         = {{2023}},
}

@misc{45560,
  author       = {{Ali, Manzoor and Saleem, Muhammad and Moussallem, Diego and Sherif, Mohamed and Ngonga Ngomo, Axel-Cyrille}},
  publisher    = {{LibreCat University}},
  title        = {{{RELD: A Knowledge Graph of Relation Extraction Datasets}}},
  doi          = {{10.5281/ZENODO.7429677}},
  year         = {{2023}},
}

@inbook{46572,
  abstract     = {{Indonesian is classified as underrepresented in the Natural Language Processing (NLP) field, despite being the tenth most spoken language in the world with 198 million speakers. The paucity of datasets is recognized as the main reason for the slow advancements in NLP research for underrepresented languages. Significant attempts were made in 2020 to address this drawback for Indonesian. The Indonesian Natural Language Understanding (IndoNLU) benchmark was introduced alongside IndoBERT pre-trained language model. The second benchmark, Indonesian Language Evaluation Montage (IndoLEM), was presented in the same year. These benchmarks support several tasks, including Named Entity Recognition (NER). However, all NER datasets are in the public domain and do not contain domain-specific datasets. To alleviate this drawback, we introduce IndQNER, a manually annotated NER benchmark dataset in the religious domain that adheres to a meticulously designed annotation guideline. Since Indonesia has the world’s largest Muslim population, we build the dataset from the Indonesian translation of the Quran. The dataset includes 2475 named entities representing 18 different classes. To assess the annotation quality of IndQNER, we perform experiments with BiLSTM and CRF-based NER, as well as IndoBERT fine-tuning. The results reveal that the first model outperforms the second model achieving 0.98 F1 points. This outcome indicates that IndQNER may be an acceptable evaluation metric for Indonesian NER tasks in the aforementioned domain, widening the research’s domain range.}},
  author       = {{Gusmita, Ria Hari and Firmansyah, Asep Fajar and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Natural Language Processing and Information Systems}},
  isbn         = {{9783031353192}},
  issn         = {{0302-9743}},
  keywords     = {{NER benchmark dataset, Indonesian, specific domain}},
  location     = {{Derby, UK}},
  publisher    = {{Springer Nature Switzerland}},
  title        = {{{IndQNER: Named Entity Recognition Benchmark Dataset from the Indonesian Translation of the Quran}}},
  doi          = {{10.1007/978-3-031-35320-8_12}},
  year         = {{2023}},
}

@inproceedings{57274,
  author       = {{Srivastava, Nikit and Perevalov, Aleksandr and Kuchelev, Denis and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille and Both, Andreas}},
  booktitle    = {{Proceedings of the 12th Knowledge Capture Conference 2023}},
  publisher    = {{ACM}},
  title        = {{{Lingua Franca – Entity-Aware Machine Translation Approach for Question Answering over Knowledge Graphs}}},
  doi          = {{10.1145/3587259.3627567}},
  year         = {{2023}},
}

@inbook{54588,
  author       = {{Manzoor, Ali and Saleem, Muhammad and Moussallem, Diego and Sherif, Mohamed and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{The Semantic Web}},
  isbn         = {{9783031334542}},
  issn         = {{0302-9743}},
  publisher    = {{Springer Nature Switzerland}},
  title        = {{{RELD: A Knowledge Graph of Relation Extraction Datasets}}},
  doi          = {{10.1007/978-3-031-33455-9_20}},
  year         = {{2023}},
}

@article{29851,
  author       = {{Pestryakova, Svetlana  and Vollmers, Daniel and Sherif, Mohamed and Heindorf, Stefan and Saleem, Muhammad  and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  journal      = {{Scientific Data}},
  title        = {{{CovidPubGraph: A FAIR Knowledge Graph of COVID-19 Publications}}},
  doi          = {{10.1038/s41597-022-01298-2}},
  year         = {{2022}},
}

@inproceedings{57285,
  author       = {{Mireles, Victor and Revenko, Artem and Srivastava, Nikit and Vollmers, Daniel and Breit, Anna and Moussallem, Diego}},
  booktitle    = {{Proceedings of Poster and Demo Track and Workshop Track of the 18th International Conference on Semantic Systems, co-located with 18th International Conference on Semantic Systems (SEMANTiCS 2022)}},
  keywords     = {{dice moussallem porque srivastava vollmers}},
  title        = {{{Proposal for PORQUE, a Polylingual Hybrid Question Answering System}}},
  year         = {{2022}},
}

@article{24721,
  author       = {{Fathi Ahmed, Abdullah and Ahmed Sherif, Mohamed and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  journal      = {{Data Knowl. Eng.}},
  pages        = {{101874}},
  title        = {{{Multilingual Verbalization and Summarization for Explainable Link Discovery}}},
  doi          = {{10.1016/j.datak.2021.101874}},
  volume       = {{133}},
  year         = {{2021}},
}

