[{"date_created":"2025-08-24T11:38:51Z","title":"ELEVATE-ID: Extending Large Language Models for End-to-End Entity Linking Evaluation in Indonesian","year":"2026","language":[{"iso":"eng"}],"keyword":["LLMs","Evaluation","End-to-end EL","Indonesian"],"publication":"Data & Knowledge Engineering","abstract":[{"lang":"eng","text":"Large Language Models (LLMs) have demonstrated remarkable performance across a wide range of natural language processing tasks. However, their effectiveness in low-resource languages remains underexplored, particularly in complex tasks such as end-to-end Entity Linking (EL), which requires both mention detection and disambiguation against a knowledge base (KB). In earlier work, we introduced IndEL — the first end-to-end EL benchmark dataset for the Indonesian language — covering both a general domain (news) and a specific domain (religious text from the Indonesian translation of the Quran), and evaluated four traditional end-to-end EL systems on this dataset. In this study, we propose ELEVATE-ID, a comprehensive evaluation framework for assessing LLM performance on end-to-end EL in Indonesian. The framework evaluates LLMs under both zero-shot and fine-tuned conditions, using multilingual and Indonesian monolingual models, with Wikidata as the target KB. Our experiments include performance benchmarking, generalization analysis across domains, and systematic error analysis. Results show that GPT-4 and GPT-3.5 achieve the highest accuracy in zero-shot and fine-tuned settings, respectively. However, even fine-tuned GPT-3.5 underperforms compared to DBpedia Spotlight — the weakest of the traditional model baselines — in the general domain. Interestingly, GPT-3.5 outperforms Babelfy in the specific domain. Generalization analysis indicates that fine-tuned GPT-3.5 adapts more effectively to cross-domain and mixed-domain scenarios. Error analysis uncovers persistent challenges that hinder LLM performance: difficulties with non-complete mentions, acronym disambiguation, and full-name recognition in formal contexts. These issues point to limitations in mention boundary detection and contextual grounding. Indonesian-pretrained LLMs, Komodo and Merak, reveal core weaknesses: template leakage and entity hallucination, respectively—underscoring architectural and training limitations in low-resource end-to-end EL.11Code and dataset are available at https://github.com/dice-group/ELEVATE-ID."}],"author":[{"first_name":"Ria Hari","full_name":"Gusmita, Ria Hari","id":"71039","last_name":"Gusmita"},{"first_name":"Asep Fajar","last_name":"Firmansyah","full_name":"Firmansyah, Asep Fajar","id":"76787"},{"first_name":"Hamada Mohamed Abdelsamee","last_name":"Zahera","orcid":"0000-0003-0215-1278","id":"72768","full_name":"Zahera, Hamada Mohamed Abdelsamee"},{"first_name":"Axel-Cyrille","last_name":"Ngonga Ngomo","full_name":"Ngonga Ngomo, Axel-Cyrille","id":"65716"}],"volume":161,"date_updated":"2025-08-25T09:40:13Z","main_file_link":[{"url":"https://www.sciencedirect.com/science/article/pii/S0169023X25000990?utm_campaign=STMJ_220042_AUTH_SERV_PA&utm_medium=email&utm_acid=78351008&SIS_ID=&dgcid=STMJ_220042_AUTH_SERV_PA&CMX_ID=&utm_in=DM591673&utm_source=AC_"}],"doi":"https://doi.org/10.1016/j.datak.2025.102504","publication_identifier":{"issn":["0169-023X"]},"citation":{"short":"R.H. Gusmita, A.F. Firmansyah, H.M.A. Zahera, A.-C. Ngonga Ngomo, Data &#38; Knowledge Engineering 161 (2026) 102504.","mla":"Gusmita, Ria Hari, et al. “ELEVATE-ID: Extending Large Language Models for End-to-End Entity Linking Evaluation in Indonesian.” <i>Data &#38; Knowledge Engineering</i>, vol. 161, 2026, p. 102504, doi:<a href=\"https://doi.org/10.1016/j.datak.2025.102504\">https://doi.org/10.1016/j.datak.2025.102504</a>.","bibtex":"@article{Gusmita_Firmansyah_Zahera_Ngonga Ngomo_2026, title={ELEVATE-ID: Extending Large Language Models for End-to-End Entity Linking Evaluation in Indonesian}, volume={161}, DOI={<a href=\"https://doi.org/10.1016/j.datak.2025.102504\">https://doi.org/10.1016/j.datak.2025.102504</a>}, journal={Data &#38; Knowledge Engineering}, author={Gusmita, Ria Hari and Firmansyah, Asep Fajar and Zahera, Hamada Mohamed Abdelsamee and Ngonga Ngomo, Axel-Cyrille}, year={2026}, pages={102504} }","apa":"Gusmita, R. H., Firmansyah, A. F., Zahera, H. M. A., &#38; Ngonga Ngomo, A.-C. (2026). ELEVATE-ID: Extending Large Language Models for End-to-End Entity Linking Evaluation in Indonesian. <i>Data &#38; Knowledge Engineering</i>, <i>161</i>, 102504. <a href=\"https://doi.org/10.1016/j.datak.2025.102504\">https://doi.org/10.1016/j.datak.2025.102504</a>","ama":"Gusmita RH, Firmansyah AF, Zahera HMA, Ngonga Ngomo A-C. ELEVATE-ID: Extending Large Language Models for End-to-End Entity Linking Evaluation in Indonesian. <i>Data &#38; Knowledge Engineering</i>. 2026;161:102504. doi:<a href=\"https://doi.org/10.1016/j.datak.2025.102504\">https://doi.org/10.1016/j.datak.2025.102504</a>","chicago":"Gusmita, Ria Hari, Asep Fajar Firmansyah, Hamada Mohamed Abdelsamee Zahera, and Axel-Cyrille Ngonga Ngomo. “ELEVATE-ID: Extending Large Language Models for End-to-End Entity Linking Evaluation in Indonesian.” <i>Data &#38; Knowledge Engineering</i> 161 (2026): 102504. <a href=\"https://doi.org/10.1016/j.datak.2025.102504\">https://doi.org/10.1016/j.datak.2025.102504</a>.","ieee":"R. H. Gusmita, A. F. Firmansyah, H. M. A. Zahera, and A.-C. Ngonga Ngomo, “ELEVATE-ID: Extending Large Language Models for End-to-End Entity Linking Evaluation in Indonesian,” <i>Data &#38; Knowledge Engineering</i>, vol. 161, p. 102504, 2026, doi: <a href=\"https://doi.org/10.1016/j.datak.2025.102504\">https://doi.org/10.1016/j.datak.2025.102504</a>."},"page":"102504","intvolume":"       161","user_id":"71039","department":[{"_id":"574"}],"_id":"60990","article_type":"original","type":"journal_article","status":"public"},{"place":"Cham","year":"2024","citation":{"short":"R.H. Gusmita, M.F.A. Abshar, D. Moussallem, A.-C. Ngonga Ngomo, in: Lecture Notes in Computer Science, Springer Nature Switzerland, Cham, 2024.","mla":"Gusmita, Ria Hari, et al. “IndEL: Indonesian Entity Linking Benchmark Dataset for General and Specific Domains.” <i>Lecture Notes in Computer Science</i>, Springer Nature Switzerland, 2024, doi:<a href=\"https://doi.org/10.1007/978-3-031-70239-6_34\">10.1007/978-3-031-70239-6_34</a>.","bibtex":"@inbook{Gusmita_Abshar_Moussallem_Ngonga Ngomo_2024, place={Cham}, title={IndEL: Indonesian Entity Linking Benchmark Dataset for General and Specific Domains}, DOI={<a href=\"https://doi.org/10.1007/978-3-031-70239-6_34\">10.1007/978-3-031-70239-6_34</a>}, booktitle={Lecture Notes in Computer Science}, publisher={Springer Nature Switzerland}, author={Gusmita, Ria Hari and Abshar, Muhammad Faruq Amiral and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}, year={2024} }","apa":"Gusmita, R. H., Abshar, M. F. A., Moussallem, D., &#38; Ngonga Ngomo, A.-C. (2024). IndEL: Indonesian Entity Linking Benchmark Dataset for General and Specific Domains. In <i>Lecture Notes in Computer Science</i>. The 29th Annual International Conference on Natural Language &#38; Information Systems (NLDB 2024), Turin, Italy. Springer Nature Switzerland. <a href=\"https://doi.org/10.1007/978-3-031-70239-6_34\">https://doi.org/10.1007/978-3-031-70239-6_34</a>","chicago":"Gusmita, Ria Hari, Muhammad Faruq Amiral Abshar, Diego Moussallem, and Axel-Cyrille Ngonga Ngomo. “IndEL: Indonesian Entity Linking Benchmark Dataset for General and Specific Domains.” In <i>Lecture Notes in Computer Science</i>. Cham: Springer Nature Switzerland, 2024. <a href=\"https://doi.org/10.1007/978-3-031-70239-6_34\">https://doi.org/10.1007/978-3-031-70239-6_34</a>.","ieee":"R. H. Gusmita, M. F. A. Abshar, D. Moussallem, and A.-C. Ngonga Ngomo, “IndEL: Indonesian Entity Linking Benchmark Dataset for General and Specific Domains,” in <i>Lecture Notes in Computer Science</i>, Cham: Springer Nature Switzerland, 2024.","ama":"Gusmita RH, Abshar MFA, Moussallem D, Ngonga Ngomo A-C. IndEL: Indonesian Entity Linking Benchmark Dataset for General and Specific Domains. In: <i>Lecture Notes in Computer Science</i>. Springer Nature Switzerland; 2024. doi:<a href=\"https://doi.org/10.1007/978-3-031-70239-6_34\">10.1007/978-3-031-70239-6_34</a>"},"publication_identifier":{"issn":["0302-9743","1611-3349"],"isbn":["9783031702389","9783031702396"]},"publication_status":"published","related_material":{"link":[{"relation":"confirmation","url":"https://link.springer.com/chapter/10.1007/978-3-031-70239-6_34"}]},"title":"IndEL: Indonesian Entity Linking Benchmark Dataset for General and Specific Domains","conference":{"end_date":"2024-06-27","location":"Turin, Italy","name":"The 29th Annual International Conference on Natural Language & Information Systems (NLDB 2024)","start_date":"2024-06-25"},"doi":"10.1007/978-3-031-70239-6_34","publisher":"Springer Nature Switzerland","date_updated":"2024-10-14T19:22:16Z","author":[{"first_name":"Ria Hari","last_name":"Gusmita","id":"71039","full_name":"Gusmita, Ria Hari"},{"last_name":"Abshar","full_name":"Abshar, Muhammad Faruq Amiral","first_name":"Muhammad Faruq Amiral"},{"first_name":"Diego","id":"71635","full_name":"Moussallem, Diego","last_name":"Moussallem"},{"first_name":"Axel-Cyrille","id":"65716","full_name":"Ngonga Ngomo, Axel-Cyrille","last_name":"Ngonga Ngomo"}],"date_created":"2024-10-10T14:29:08Z","abstract":[{"text":"In recent years, there has been a surge in natural language processing research focused on low-resource languages (LrLs), underscoring the growing recognition that LrLs deserve the same attention as high-resource languages (HrLs). This shift is crucial for ensuring linguistic diversity and inclusivity in the digital age. Despite Indonesian ranking as the 11th most spoken language globally, it remains under-resourced in terms of computational tools and datasets. Within the semantic web domain, Entity Linking (EL) is pivotal, linking textual entity mentions to their corresponding entries in knowledge bases. This process is foundational for advanced information extraction tasks, including relation extraction and event detection. To bolster EL research in Indonesian, we introduce IndEL, the first benchmark dataset tailored for both general and specific domains. IndEL was manually curated using Wikidata, adhering to a rigorous set of annotation guidelines. We used two Named Entity Recognition (NER) benchmark datasets for entity extraction: NER UI for the general domain and IndQNER for the specific domain. IndQNER focused on entities from the Indonesian translation of the Quran. IndEL comprises 4765 entities in the general domain and 2453 in the specific domain. Using the GERBIL framework, we use IndEL to evaluate the performance of various EL systems, such as Babelfy, DBpedia Spotlight, MAG, OpenTapioca, and WAT. Our further investigation reveals that within Wikidata, a significant number of NIL entities remain unlinked due to the limited number of Indonesian labels and the use of acronyms. Especially in the specific domain, transliteration and translation processes performed to create the Indonesian translation of the Quran contribute to the presence of entities in a descriptive form and as synonyms.","lang":"eng"}],"status":"public","publication":"Lecture Notes in Computer Science","type":"book_chapter","keyword":["entity linking benchmark dataset","Indonesian","general and specific domains"],"language":[{"iso":"eng"}],"_id":"56581","user_id":"71039"},{"date_updated":"2024-11-19T15:41:34Z","author":[{"id":"71039","full_name":"Gusmita, Ria Hari","last_name":"Gusmita","first_name":"Ria Hari"},{"last_name":"Firmansyah","id":"76787","full_name":"Firmansyah, Asep Fajar","first_name":"Asep Fajar"},{"first_name":"Diego","last_name":"Moussallem","id":"71635","full_name":"Moussallem, Diego"},{"first_name":"Axel-Cyrille","last_name":"Ngonga Ngomo","full_name":"Ngonga Ngomo, Axel-Cyrille","id":"65716"}],"conference":{"end_date":"2023-06-23","location":"Derby, UK","name":"International Conference on Applications of Natural Language to Information Systems (NLDB) 2023","start_date":"2023-06-21"},"doi":"10.1007/978-3-031-35320-8_12","publication_status":"published","publication_identifier":{"isbn":["9783031353192","9783031353208"],"issn":["0302-9743","1611-3349"]},"related_material":{"link":[{"relation":"confirmation","url":"https://link.springer.com/chapter/10.1007/978-3-031-35320-8_12"}]},"place":"Cham","citation":{"apa":"Gusmita, R. H., Firmansyah, A. F., Moussallem, D., &#38; Ngonga Ngomo, A.-C. (2023). IndQNER: Named Entity Recognition Benchmark Dataset from the Indonesian Translation of the Quran. In <i>Natural Language Processing and Information Systems</i>. International Conference on Applications of Natural Language to Information Systems (NLDB) 2023, Derby, UK. Springer Nature Switzerland. <a href=\"https://doi.org/10.1007/978-3-031-35320-8_12\">https://doi.org/10.1007/978-3-031-35320-8_12</a>","bibtex":"@inbook{Gusmita_Firmansyah_Moussallem_Ngonga Ngomo_2023, place={Cham}, title={IndQNER: Named Entity Recognition Benchmark Dataset from the Indonesian Translation of the Quran}, DOI={<a href=\"https://doi.org/10.1007/978-3-031-35320-8_12\">10.1007/978-3-031-35320-8_12</a>}, booktitle={Natural Language Processing and Information Systems}, publisher={Springer Nature Switzerland}, author={Gusmita, Ria Hari and Firmansyah, Asep Fajar and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}, year={2023} }","short":"R.H. Gusmita, A.F. Firmansyah, D. Moussallem, A.-C. Ngonga Ngomo, in: Natural Language Processing and Information Systems, Springer Nature Switzerland, Cham, 2023.","mla":"Gusmita, Ria Hari, et al. “IndQNER: Named Entity Recognition Benchmark Dataset from the Indonesian Translation of the Quran.” <i>Natural Language Processing and Information Systems</i>, Springer Nature Switzerland, 2023, doi:<a href=\"https://doi.org/10.1007/978-3-031-35320-8_12\">10.1007/978-3-031-35320-8_12</a>.","ieee":"R. H. Gusmita, A. F. Firmansyah, D. Moussallem, and A.-C. Ngonga Ngomo, “IndQNER: Named Entity Recognition Benchmark Dataset from the Indonesian Translation of the Quran,” in <i>Natural Language Processing and Information Systems</i>, Cham: Springer Nature Switzerland, 2023.","chicago":"Gusmita, Ria Hari, Asep Fajar Firmansyah, Diego Moussallem, and Axel-Cyrille Ngonga Ngomo. “IndQNER: Named Entity Recognition Benchmark Dataset from the Indonesian Translation of the Quran.” In <i>Natural Language Processing and Information Systems</i>. Cham: Springer Nature Switzerland, 2023. <a href=\"https://doi.org/10.1007/978-3-031-35320-8_12\">https://doi.org/10.1007/978-3-031-35320-8_12</a>.","ama":"Gusmita RH, Firmansyah AF, Moussallem D, Ngonga Ngomo A-C. IndQNER: Named Entity Recognition Benchmark Dataset from the Indonesian Translation of the Quran. In: <i>Natural Language Processing and Information Systems</i>. Springer Nature Switzerland; 2023. doi:<a href=\"https://doi.org/10.1007/978-3-031-35320-8_12\">10.1007/978-3-031-35320-8_12</a>"},"_id":"46572","user_id":"71039","department":[{"_id":"34"},{"_id":"574"}],"type":"book_chapter","status":"public","publisher":"Springer Nature Switzerland","date_created":"2023-08-17T12:41:45Z","title":"IndQNER: Named Entity Recognition Benchmark Dataset from the Indonesian Translation of the Quran","year":"2023","keyword":["NER benchmark dataset","Indonesian","specific domain"],"language":[{"iso":"eng"}],"publication":"Natural Language Processing and Information Systems","abstract":[{"lang":"eng","text":"Indonesian is classified as underrepresented in the Natural Language Processing (NLP) field, despite being the tenth most spoken language in the world with 198 million speakers. The paucity of datasets is recognized as the main reason for the slow advancements in NLP research for underrepresented languages. Significant attempts were made in 2020 to address this drawback for Indonesian. The Indonesian Natural Language Understanding (IndoNLU) benchmark was introduced alongside IndoBERT pre-trained language model. The second benchmark, Indonesian Language Evaluation Montage (IndoLEM), was presented in the same year. These benchmarks support several tasks, including Named Entity Recognition (NER). However, all NER datasets are in the public domain and do not contain domain-specific datasets. To alleviate this drawback, we introduce IndQNER, a manually annotated NER benchmark dataset in the religious domain that adheres to a meticulously designed annotation guideline. Since Indonesia has the world’s largest Muslim population, we build the dataset from the Indonesian translation of the Quran. The dataset includes 2475 named entities representing 18 different classes. To assess the annotation quality of IndQNER, we perform experiments with BiLSTM and CRF-based NER, as well as IndoBERT fine-tuning. The results reveal that the first model outperforms the second model achieving 0.98 F1 points. This outcome indicates that IndQNER may be an acceptable evaluation metric for Indonesian NER tasks in the aforementioned domain, widening the research’s domain range."}]}]
