@article{60990,
  abstract     = {{Large Language Models (LLMs) have demonstrated remarkable performance across a wide range of natural language processing tasks. However, their effectiveness in low-resource languages remains underexplored, particularly in complex tasks such as end-to-end Entity Linking (EL), which requires both mention detection and disambiguation against a knowledge base (KB). In earlier work, we introduced IndEL — the first end-to-end EL benchmark dataset for the Indonesian language — covering both a general domain (news) and a specific domain (religious text from the Indonesian translation of the Quran), and evaluated four traditional end-to-end EL systems on this dataset. In this study, we propose ELEVATE-ID, a comprehensive evaluation framework for assessing LLM performance on end-to-end EL in Indonesian. The framework evaluates LLMs under both zero-shot and fine-tuned conditions, using multilingual and Indonesian monolingual models, with Wikidata as the target KB. Our experiments include performance benchmarking, generalization analysis across domains, and systematic error analysis. Results show that GPT-4 and GPT-3.5 achieve the highest accuracy in zero-shot and fine-tuned settings, respectively. However, even fine-tuned GPT-3.5 underperforms compared to DBpedia Spotlight — the weakest of the traditional model baselines — in the general domain. Interestingly, GPT-3.5 outperforms Babelfy in the specific domain. Generalization analysis indicates that fine-tuned GPT-3.5 adapts more effectively to cross-domain and mixed-domain scenarios. Error analysis uncovers persistent challenges that hinder LLM performance: difficulties with non-complete mentions, acronym disambiguation, and full-name recognition in formal contexts. These issues point to limitations in mention boundary detection and contextual grounding. Indonesian-pretrained LLMs, Komodo and Merak, reveal core weaknesses: template leakage and entity hallucination, respectively—underscoring architectural and training limitations in low-resource end-to-end EL.11Code and dataset are available at https://github.com/dice-group/ELEVATE-ID.}},
  author       = {{Gusmita, Ria Hari and Firmansyah, Asep Fajar and Zahera, Hamada Mohamed Abdelsamee and Ngonga Ngomo, Axel-Cyrille}},
  issn         = {{0169-023X}},
  journal      = {{Data & Knowledge Engineering}},
  keywords     = {{LLMs, Evaluation, End-to-end EL, Indonesian}},
  pages        = {{102504}},
  title        = {{{ELEVATE-ID: Extending Large Language Models for End-to-End Entity Linking Evaluation in Indonesian}}},
  doi          = {{https://doi.org/10.1016/j.datak.2025.102504}},
  volume       = {{161}},
  year         = {{2026}},
}

@inproceedings{63027,
  author       = {{Ihtassine, Reda and Firmansyah, Asep Fajar and Srivastava, Nikit and Manzoor, Ali and Ngonga Ngomo, Axel-Cyrille and Sherif, Mohamed Ahmed}},
  booktitle    = {{Proceedings of the Knowledge Capture Conference 2025}},
  publisher    = {{ACM}},
  title        = {{{NL2LS: LLM-based Automatic Linking of Knowledge Graphs}}},
  doi          = {{10.1145/3731443.3771374}},
  year         = {{2025}},
}

@inproceedings{59054,
  author       = {{Firmansyah, Asep Fajar and Zahera, Hamada Mohamed Abdelsamee and Sherif, Mohamed and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{ESWC2025}},
  isbn         = {{978-3-031-94575-5}},
  keywords     = {{firmansyah mousallem ngonga sherif zahera}},
  pages        = {{133----151}},
  publisher    = {{pringer Nature Switzerland}},
  title        = {{{ANTS: Abstractive Entity Summarization in Knowledge Graphs}}},
  doi          = {{10.1007/978-3-031-94575-5_8}},
  year         = {{2025}},
}

@inproceedings{62119,
  author       = {{Ihtassine, Reda and Firmansyah, Asep Fajar and Srivastava, Nikit and Ali, Manzoor and Ngonga Ngomo, Axel-Cyrille and Sherif, Mohamed}},
  booktitle    = {{Proceedings of the 12th Knowledge Capture Conference 2025, {K-CAP} 2025, The Thirteenth International Conference on Knowledge Capture, December 10 - 12, 2025, Dayton, Ohio, USA}},
  keywords     = {{Srivastava ali dice enexa firmansyah ihtassine ngonga sailproject sherif whale}},
  publisher    = {{ACM}},
  title        = {{{NL2LS: LLM-based Automatic Linking of Knowledge Graphs}}},
  year         = {{2025}},
}

@inbook{54412,
  author       = {{Firmansyah, Asep Fajar and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{The Semantic Web}},
  isbn         = {{9783031606250}},
  issn         = {{0302-9743}},
  publisher    = {{Springer Nature Switzerland}},
  title        = {{{ESLM: Improving Entity Summarization by Leveraging Language Models}}},
  doi          = {{10.1007/978-3-031-60626-7_9}},
  year         = {{2024}},
}

@inbook{46516,
  abstract     = {{Linked knowledge graphs build the backbone of many data-driven applications such as search engines, conversational agents and e-commerce solutions. Declarative link discovery frameworks use complex link specifications to express the conditions under which a link between two resources can be deemed to exist. However, understanding such complex link specifications is a challenging task for non-expert users of link discovery frameworks. In this paper, we address this drawback by devising NMV-LS, a language model-based verbalization approach for translating complex link specifications into natural language. NMV-LS relies on the results of rule-based link specification verbalization to apply continuous training on T5, a large language model based on the Transformerarchitecture. We evaluated NMV-LS on English and German datasets using well-known machine translation metrics such as BLUE, METEOR, ChrF++ and TER. Our results suggest that our approach achieves a verbalization performance close to that of humans and outperforms state of the art approaches. Our source code and datasets are publicly available at https://github.com/dice-group/NMV-LS.}},
  author       = {{Ahmed, Abdullah Fathi Ahmed and Firmansyah, Asep Fajar and Sherif, Mohamed and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Natural Language Processing and Information Systems}},
  isbn         = {{9783031353192}},
  issn         = {{0302-9743}},
  publisher    = {{Springer Nature Switzerland}},
  title        = {{{Explainable Integration of Knowledge Graphs Using Large Language Models}}},
  doi          = {{10.1007/978-3-031-35320-8_9}},
  year         = {{2023}},
}

@inbook{46572,
  abstract     = {{Indonesian is classified as underrepresented in the Natural Language Processing (NLP) field, despite being the tenth most spoken language in the world with 198 million speakers. The paucity of datasets is recognized as the main reason for the slow advancements in NLP research for underrepresented languages. Significant attempts were made in 2020 to address this drawback for Indonesian. The Indonesian Natural Language Understanding (IndoNLU) benchmark was introduced alongside IndoBERT pre-trained language model. The second benchmark, Indonesian Language Evaluation Montage (IndoLEM), was presented in the same year. These benchmarks support several tasks, including Named Entity Recognition (NER). However, all NER datasets are in the public domain and do not contain domain-specific datasets. To alleviate this drawback, we introduce IndQNER, a manually annotated NER benchmark dataset in the religious domain that adheres to a meticulously designed annotation guideline. Since Indonesia has the world’s largest Muslim population, we build the dataset from the Indonesian translation of the Quran. The dataset includes 2475 named entities representing 18 different classes. To assess the annotation quality of IndQNER, we perform experiments with BiLSTM and CRF-based NER, as well as IndoBERT fine-tuning. The results reveal that the first model outperforms the second model achieving 0.98 F1 points. This outcome indicates that IndQNER may be an acceptable evaluation metric for Indonesian NER tasks in the aforementioned domain, widening the research’s domain range.}},
  author       = {{Gusmita, Ria Hari and Firmansyah, Asep Fajar and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Natural Language Processing and Information Systems}},
  isbn         = {{9783031353192}},
  issn         = {{0302-9743}},
  keywords     = {{NER benchmark dataset, Indonesian, specific domain}},
  location     = {{Derby, UK}},
  publisher    = {{Springer Nature Switzerland}},
  title        = {{{IndQNER: Named Entity Recognition Benchmark Dataset from the Indonesian Translation of the Quran}}},
  doi          = {{10.1007/978-3-031-35320-8_12}},
  year         = {{2023}},
}

@inproceedings{29486,
  author       = {{Firmansyah, Asep Fajar and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Proceedings of the 11th on Knowledge Capture Conference}},
  isbn         = {{978-1-4503-8457-5}},
  pages        = {{73–80}},
  publisher    = {{ACM}},
  title        = {{{GATES: Using Graph Attention Networks for Entity Summarization}}},
  doi          = {{10.1145/3460210.3493574}},
  year         = {{2021}},
}

