[{"keyword":["LLMs","Evaluation","End-to-end EL","Indonesian"],"language":[{"iso":"eng"}],"publication":"Data & Knowledge Engineering","abstract":[{"text":"Large Language Models (LLMs) have demonstrated remarkable performance across a wide range of natural language processing tasks. However, their effectiveness in low-resource languages remains underexplored, particularly in complex tasks such as end-to-end Entity Linking (EL), which requires both mention detection and disambiguation against a knowledge base (KB). In earlier work, we introduced IndEL — the first end-to-end EL benchmark dataset for the Indonesian language — covering both a general domain (news) and a specific domain (religious text from the Indonesian translation of the Quran), and evaluated four traditional end-to-end EL systems on this dataset. In this study, we propose ELEVATE-ID, a comprehensive evaluation framework for assessing LLM performance on end-to-end EL in Indonesian. The framework evaluates LLMs under both zero-shot and fine-tuned conditions, using multilingual and Indonesian monolingual models, with Wikidata as the target KB. Our experiments include performance benchmarking, generalization analysis across domains, and systematic error analysis. Results show that GPT-4 and GPT-3.5 achieve the highest accuracy in zero-shot and fine-tuned settings, respectively. However, even fine-tuned GPT-3.5 underperforms compared to DBpedia Spotlight — the weakest of the traditional model baselines — in the general domain. Interestingly, GPT-3.5 outperforms Babelfy in the specific domain. Generalization analysis indicates that fine-tuned GPT-3.5 adapts more effectively to cross-domain and mixed-domain scenarios. Error analysis uncovers persistent challenges that hinder LLM performance: difficulties with non-complete mentions, acronym disambiguation, and full-name recognition in formal contexts. These issues point to limitations in mention boundary detection and contextual grounding. Indonesian-pretrained LLMs, Komodo and Merak, reveal core weaknesses: template leakage and entity hallucination, respectively—underscoring architectural and training limitations in low-resource end-to-end EL.11Code and dataset are available at https://github.com/dice-group/ELEVATE-ID.","lang":"eng"}],"date_created":"2025-08-24T11:38:51Z","title":"ELEVATE-ID: Extending Large Language Models for End-to-End Entity Linking Evaluation in Indonesian","year":"2026","_id":"60990","department":[{"_id":"574"}],"user_id":"71039","article_type":"original","type":"journal_article","status":"public","date_updated":"2025-08-25T09:40:13Z","volume":161,"author":[{"id":"71039","full_name":"Gusmita, Ria Hari","last_name":"Gusmita","first_name":"Ria Hari"},{"last_name":"Firmansyah","id":"76787","full_name":"Firmansyah, Asep Fajar","first_name":"Asep Fajar"},{"last_name":"Zahera","orcid":"0000-0003-0215-1278","id":"72768","full_name":"Zahera, Hamada Mohamed Abdelsamee","first_name":"Hamada Mohamed Abdelsamee"},{"first_name":"Axel-Cyrille","last_name":"Ngonga Ngomo","id":"65716","full_name":"Ngonga Ngomo, Axel-Cyrille"}],"doi":"https://doi.org/10.1016/j.datak.2025.102504","main_file_link":[{"url":"https://www.sciencedirect.com/science/article/pii/S0169023X25000990?utm_campaign=STMJ_220042_AUTH_SERV_PA&utm_medium=email&utm_acid=78351008&SIS_ID=&dgcid=STMJ_220042_AUTH_SERV_PA&CMX_ID=&utm_in=DM591673&utm_source=AC_"}],"publication_identifier":{"issn":["0169-023X"]},"intvolume":"       161","page":"102504","citation":{"apa":"Gusmita, R. H., Firmansyah, A. F., Zahera, H. M. A., &#38; Ngonga Ngomo, A.-C. (2026). ELEVATE-ID: Extending Large Language Models for End-to-End Entity Linking Evaluation in Indonesian. <i>Data &#38; Knowledge Engineering</i>, <i>161</i>, 102504. <a href=\"https://doi.org/10.1016/j.datak.2025.102504\">https://doi.org/10.1016/j.datak.2025.102504</a>","bibtex":"@article{Gusmita_Firmansyah_Zahera_Ngonga Ngomo_2026, title={ELEVATE-ID: Extending Large Language Models for End-to-End Entity Linking Evaluation in Indonesian}, volume={161}, DOI={<a href=\"https://doi.org/10.1016/j.datak.2025.102504\">https://doi.org/10.1016/j.datak.2025.102504</a>}, journal={Data &#38; Knowledge Engineering}, author={Gusmita, Ria Hari and Firmansyah, Asep Fajar and Zahera, Hamada Mohamed Abdelsamee and Ngonga Ngomo, Axel-Cyrille}, year={2026}, pages={102504} }","short":"R.H. Gusmita, A.F. Firmansyah, H.M.A. Zahera, A.-C. Ngonga Ngomo, Data &#38; Knowledge Engineering 161 (2026) 102504.","mla":"Gusmita, Ria Hari, et al. “ELEVATE-ID: Extending Large Language Models for End-to-End Entity Linking Evaluation in Indonesian.” <i>Data &#38; Knowledge Engineering</i>, vol. 161, 2026, p. 102504, doi:<a href=\"https://doi.org/10.1016/j.datak.2025.102504\">https://doi.org/10.1016/j.datak.2025.102504</a>.","ama":"Gusmita RH, Firmansyah AF, Zahera HMA, Ngonga Ngomo A-C. ELEVATE-ID: Extending Large Language Models for End-to-End Entity Linking Evaluation in Indonesian. <i>Data &#38; Knowledge Engineering</i>. 2026;161:102504. doi:<a href=\"https://doi.org/10.1016/j.datak.2025.102504\">https://doi.org/10.1016/j.datak.2025.102504</a>","ieee":"R. H. Gusmita, A. F. Firmansyah, H. M. A. Zahera, and A.-C. Ngonga Ngomo, “ELEVATE-ID: Extending Large Language Models for End-to-End Entity Linking Evaluation in Indonesian,” <i>Data &#38; Knowledge Engineering</i>, vol. 161, p. 102504, 2026, doi: <a href=\"https://doi.org/10.1016/j.datak.2025.102504\">https://doi.org/10.1016/j.datak.2025.102504</a>.","chicago":"Gusmita, Ria Hari, Asep Fajar Firmansyah, Hamada Mohamed Abdelsamee Zahera, and Axel-Cyrille Ngonga Ngomo. “ELEVATE-ID: Extending Large Language Models for End-to-End Entity Linking Evaluation in Indonesian.” <i>Data &#38; Knowledge Engineering</i> 161 (2026): 102504. <a href=\"https://doi.org/10.1016/j.datak.2025.102504\">https://doi.org/10.1016/j.datak.2025.102504</a>."}},{"ddc":["004"],"keyword":["Language model fairness","Aspect phrase classification","Grade prediction","Physician reviews"],"language":[{"iso":"eng"}],"abstract":[{"lang":"eng","text":"In this study, we evaluate the impact of gender-biased data from German-language physician reviews on the fairness of fine-tuned language models. For two different downstream tasks, we use data reported to be gender biased and aggregate it with annotations. First, we propose a new approach to aspect-based sentiment analysis that allows identifying, extracting, and classifying implicit and explicit aspect phrases and their polarity within a single model. The second task we present is grade prediction, where we predict the overall grade of a review on the basis of the review text. For both tasks, we train numerous transformer models and evaluate their performance. The aggregation of sensitive attributes, such as a physician’s gender and migration background, with individual text reviews allows us to measure the performance of the models with respect to these sensitive groups. These group-wise performance measures act as extrinsic bias measures for our downstream tasks. In addition, we translate several gender-specific templates of the intrinsic bias metrics into the German language and evaluate our fine-tuned models. Based on this set of tasks, fine-tuned models, and intrinsic and extrinsic bias measures, we perform correlation analyses between intrinsic and extrinsic bias measures. In terms of sensitive groups and effect sizes, our bias measure results show different directions. Furthermore, correlations between measures of intrinsic and extrinsic bias can be observed in different directions. This leads us to conclude that gender-biased data does not inherently lead to biased models. Other variables, such as template dependency for intrinsic measures and label distribution in the data, must be taken into account as they strongly influence the metric results. Therefore, we suggest that metrics and templates should be chosen according to the given task and the biases to be assessed. "}],"file":[{"content_type":"application/pdf","success":1,"relation":"main_file","date_updated":"2024-04-30T12:34:35Z","date_created":"2024-04-30T12:34:35Z","creator":"jkers","file_size":1381398,"file_name":"Kersting 2023.pdf","file_id":"53802","access_level":"closed"}],"publication":"Data & Knowledge Engineering","title":"Towards comparable ratings: Exploring bias in German physician reviews","publisher":"Elsevier","date_created":"2024-04-30T12:30:56Z","year":"2023","article_type":"original","article_number":"102235","file_date_updated":"2024-04-30T12:34:35Z","funded_apc":"1","project":[{"_id":"1","name":"SFB 901: SFB 901: On-The-Fly Computing - Individualisierte IT-Dienstleistungen in dynamischen Märkten ","grant_number":"160364472"},{"name":"SFB 901 - B: SFB 901 - Project Area B","_id":"3"},{"grant_number":"160364472","_id":"9","name":"SFB 901 - B1: SFB 901 - Parametrisierte Servicespezifikation (Subproject B1)"}],"_id":"53801","user_id":"58701","department":[{"_id":"579"}],"status":"public","type":"journal_article","main_file_link":[{"open_access":"1","url":" https://doi.org/10.1016/j.datak.2023.102235 "}],"doi":"10.1016/j.datak.2023.102235","oa":"1","date_updated":"2024-04-30T12:41:14Z","author":[{"last_name":"Kersting","full_name":"Kersting, Joschka","id":"58701","first_name":"Joschka"},{"last_name":"Maoro","full_name":"Maoro, Falk","first_name":"Falk"},{"first_name":"Michaela","full_name":"Geierhos, Michaela","last_name":"Geierhos"}],"volume":148,"citation":{"apa":"Kersting, J., Maoro, F., &#38; Geierhos, M. (2023). Towards comparable ratings: Exploring bias in German physician reviews. <i>Data &#38; Knowledge Engineering</i>, <i>148</i>, Article 102235. <a href=\"https://doi.org/10.1016/j.datak.2023.102235\">https://doi.org/10.1016/j.datak.2023.102235</a>","short":"J. Kersting, F. Maoro, M. Geierhos, Data &#38; Knowledge Engineering 148 (2023).","mla":"Kersting, Joschka, et al. “Towards Comparable Ratings: Exploring Bias in German Physician Reviews.” <i>Data &#38; Knowledge Engineering</i>, vol. 148, 102235, Elsevier, 2023, doi:<a href=\"https://doi.org/10.1016/j.datak.2023.102235\">10.1016/j.datak.2023.102235</a>.","bibtex":"@article{Kersting_Maoro_Geierhos_2023, title={Towards comparable ratings: Exploring bias in German physician reviews}, volume={148}, DOI={<a href=\"https://doi.org/10.1016/j.datak.2023.102235\">10.1016/j.datak.2023.102235</a>}, number={102235}, journal={Data &#38; Knowledge Engineering}, publisher={Elsevier}, author={Kersting, Joschka and Maoro, Falk and Geierhos, Michaela}, year={2023} }","ama":"Kersting J, Maoro F, Geierhos M. Towards comparable ratings: Exploring bias in German physician reviews. <i>Data &#38; Knowledge Engineering</i>. 2023;148. doi:<a href=\"https://doi.org/10.1016/j.datak.2023.102235\">10.1016/j.datak.2023.102235</a>","chicago":"Kersting, Joschka, Falk Maoro, and Michaela Geierhos. “Towards Comparable Ratings: Exploring Bias in German Physician Reviews.” <i>Data &#38; Knowledge Engineering</i> 148 (2023). <a href=\"https://doi.org/10.1016/j.datak.2023.102235\">https://doi.org/10.1016/j.datak.2023.102235</a>.","ieee":"J. Kersting, F. Maoro, and M. Geierhos, “Towards comparable ratings: Exploring bias in German physician reviews,” <i>Data &#38; Knowledge Engineering</i>, vol. 148, Art. no. 102235, 2023, doi: <a href=\"https://doi.org/10.1016/j.datak.2023.102235\">10.1016/j.datak.2023.102235</a>."},"intvolume":"       148","publication_status":"published","has_accepted_license":"1","publication_identifier":{"issn":["0169-023X"]}},{"abstract":[{"text":"The number and size of datasets abiding by the Linked Data paradigm increase every day. Discovering links between these datasets is thus central to achieving the vision behind the Data Web. Declarative Link Discovery (LD) frameworks rely on complex Link Specification (LS) to express the conditions under which two resources should be linked. Understanding such LS is not a trivial task for non-expert users. Particularly when such users are interested in generating LS to match their needs. Even if the user applies a machine learning algorithm for the automatic generation of the required LS, the challenge of explaining the resultant LS persists. Hence, providing explainable LS is the key challenge to enable users who are unfamiliar with underlying LS technologies to use them effectively and efficiently. In this paper, we extend our previous work (Ahmed et al., 2019) by proposing a generic multilingual approach that allows verbalization of LS in many languages, i.e., converts LS into understandable natural language text. In this work, we ported our LS verbalization framework into German and Spanish, in addition to English language. Our adequacy and fluency evaluations show that our approach can generate complete and easily understandable natural language descriptions even by lay users. Moreover, we devised an experimental neural approach for improving the quality of our generated texts. Our neural approach achieves promising results in terms of BLEU, METEOR and chrF++.","lang":"eng"}],"status":"public","type":"journal_article","publication":"Data & Knowledge Engineering","keyword":["2021 sys:relevantFor:infai simba sherif ngonga ahmed limes dice raki moussallem libo opal knowgraphs"],"language":[{"iso":"eng"}],"_id":"29005","user_id":"67234","year":"2021","citation":{"bibtex":"@article{Fathi Ahmed_Sherif_Moussallem_Ngonga Ngomo_2021, title={Multilingual Verbalization and Summarization for Explainable Link Discovery}, DOI={<a href=\"https://doi.org/10.1016/j.datak.2021.101874\">https://doi.org/10.1016/j.datak.2021.101874</a>}, journal={Data &#38; Knowledge Engineering}, author={Fathi Ahmed, Abdullah and Sherif, Mohamed and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}, year={2021}, pages={101874} }","short":"A. Fathi Ahmed, M. Sherif, D. Moussallem, A.-C. Ngonga Ngomo, Data &#38; Knowledge Engineering (2021) 101874.","mla":"Fathi Ahmed, Abdullah, et al. “Multilingual Verbalization and Summarization for Explainable Link Discovery.” <i>Data &#38; Knowledge Engineering</i>, 2021, p. 101874, doi:<a href=\"https://doi.org/10.1016/j.datak.2021.101874\">https://doi.org/10.1016/j.datak.2021.101874</a>.","apa":"Fathi Ahmed, A., Sherif, M., Moussallem, D., &#38; Ngonga Ngomo, A.-C. (2021). Multilingual Verbalization and Summarization for Explainable Link Discovery. <i>Data &#38; Knowledge Engineering</i>, 101874. <a href=\"https://doi.org/10.1016/j.datak.2021.101874\">https://doi.org/10.1016/j.datak.2021.101874</a>","ama":"Fathi Ahmed A, Sherif M, Moussallem D, Ngonga Ngomo A-C. Multilingual Verbalization and Summarization for Explainable Link Discovery. <i>Data &#38; Knowledge Engineering</i>. Published online 2021:101874. doi:<a href=\"https://doi.org/10.1016/j.datak.2021.101874\">https://doi.org/10.1016/j.datak.2021.101874</a>","ieee":"A. Fathi Ahmed, M. Sherif, D. Moussallem, and A.-C. Ngonga Ngomo, “Multilingual Verbalization and Summarization for Explainable Link Discovery,” <i>Data &#38; Knowledge Engineering</i>, p. 101874, 2021, doi: <a href=\"https://doi.org/10.1016/j.datak.2021.101874\">https://doi.org/10.1016/j.datak.2021.101874</a>.","chicago":"Fathi Ahmed, Abdullah, Mohamed Sherif, Diego Moussallem, and Axel-Cyrille Ngonga Ngomo. “Multilingual Verbalization and Summarization for Explainable Link Discovery.” <i>Data &#38; Knowledge Engineering</i>, 2021, 101874. <a href=\"https://doi.org/10.1016/j.datak.2021.101874\">https://doi.org/10.1016/j.datak.2021.101874</a>."},"page":"101874","publication_identifier":{"issn":["0169-023X"]},"title":"Multilingual Verbalization and Summarization for Explainable Link Discovery","doi":"https://doi.org/10.1016/j.datak.2021.101874","date_updated":"2023-08-16T10:26:16Z","date_created":"2021-12-17T09:51:15Z","author":[{"full_name":"Fathi Ahmed, Abdullah","last_name":"Fathi Ahmed","first_name":"Abdullah"},{"first_name":"Mohamed","orcid":"https://orcid.org/0000-0002-9927-2203","last_name":"Sherif","id":"67234","full_name":"Sherif, Mohamed"},{"first_name":"Diego","last_name":"Moussallem","full_name":"Moussallem, Diego","id":"71635"},{"first_name":"Axel-Cyrille","last_name":"Ngonga Ngomo","full_name":"Ngonga Ngomo, Axel-Cyrille","id":"65716"}]}]