@book{meelenfaggionatohill2024,author={Meelen, Marieke and Faggionato, Christian and Hill, Nathan},title={Tibetan digital humanities and natural language processing},note={Proceedings of the IATS 2022 panel as a Special Issue of the Revue d'Etudes Tib\'etaines},publisher={Centre National de la Recherche Scientifique (CNRS), Paris},year={2024}}
Ngi Dzardzongke ki choe gongma [My first Dzardzongke book: early reader to learn the language of South Mustang, Nepal]
@book{meelenrambletsewang2024,author={Meelen, Marieke and Ramble, Charles and Tsewang, Kemi},title={Ngi Dzardzongke ki choe gongma [My first Dzardzongke book: early reader to learn the language of South Mustang, Nepal]},publisher={},year={2024}}
2022
Creating annotated corpora for historical languages
@book{meelenwillis2022,author={Meelen, Marieke and Willis, David},title={Creating annotated corpora for historical languages},note={Special Issue for Journal of Historical Syntax},volume={6},year={2022}}
2016
Why Jesus and Job spoke bad Welsh: The origin and distribution of V2 orders in Middle Welsh
@book{meelen2016,title={Why Jesus and Job spoke bad Welsh: The origin and distribution of V2 orders in Middle Welsh},author={Meelen, Marieke},publisher={LOT Publications},year={2016},school={Leiden University}}
Peer-Reviewed Articles & Chapters
2026
From Large and Complex Manuscript Collections to Searchable eTexts: the Case of PaganTibet
@article{griffithsmeelen2026,author={Griffiths, Rachael M. and Meelen, Marieke},title={{From Large and Complex Manuscript Collections to Searchable eTexts: the Case of PaganTibet}},volume={80},journal={Revue d'Etudes Tib\'etaines},year={2026}}
2025
Comparing efficacy of IPA vs Pinyin romanisation transcriptions for complex tonal languages: A case study in Baima
Katia Chirkova, Rolando Coto-Solano, Rachael Griffiths, and Marieke Meelen
In Eight Workshop on the Use of Computational Methods in the Study of Endangered Languages , pp. 170-181
@inproceedings{chirkova2025comparing,title={Comparing efficacy of IPA vs Pinyin romanisation transcriptions for complex tonal languages: A case study in Baima},author={Chirkova, Katia and Coto-Solano, Rolando and Griffiths, Rachael and Meelen, Marieke},booktitle={Eight Workshop on the Use of Computational Methods in the Study of Endangered Languages},pages={170-181},year={2025}}
Syntactic reconstruction in Celtic
Marieke Meelen
In Foundational approaches to Celtic linguistics , pp. 417–467
@incollection{meelen2025syntacticreconstruction,title={Syntactic reconstruction in Celtic},author={Meelen, Marieke},booktitle={Foundational approaches to Celtic linguistics},editor={Carnie, Andrew and Ohala, Diane and Hunter, Dee and Prins, Samantha and Hammond, Michael and Irizarry, Luis},pages={417–467},year={2025},doi={10.5281/zenodo.15654879},publisher={Language Science Press}}
Collaborative Workflows for Handwritten Text Recognition in Under-Resourced Manuscript Collections
@article{meelengriffiths2025b,title={Collaborative Workflows for Handwritten Text Recognition in Under-Resourced Manuscript Collections},author={Meelen, Marieke and Griffiths, Rachael M},year={2025},journal={Journal of Open Humanities Data},volume={11},pages={1-54},doi={10.5334/johd.388},publisher={Ubiquity Press}}
How ‘Pagan’ is my text? Information Extraction from untranscribed data
Rachael M. Griffiths and Marieke Meelen
In Proceedings of the Computational Humanities Research conference , pp. 1262–1273
@inproceedings{griffithsmeelen2025b,author={Griffiths, Rachael M. and Meelen, Marieke},title={{How `Pagan' is my text? Information Extraction from untranscribed data}},editors={Arnold, Taylor and Fantoli, Margherita and Ros, Ruben},pages={1262–1273},doi={10.63744/aYiz0uLyIS4f},booktitle={{Proceedings of the Computational Humanities Research conference}},year={2025}}
2024
End-to-end speech recognition for endangered languages of Nepal
Marieke Meelen, Alexander O’Neill, and Rolando Coto-Solano
In Proceedings of the Seventh Workshop on the Use of Computational Methods in the Study of Endangered Languages , pp. 83–93
@inproceedings{meelen2024end,title={End-to-end speech recognition for endangered languages of Nepal},author={Meelen, Marieke and O’Neill, Alexander and Coto-Solano, Rolando},booktitle={Proceedings of the Seventh Workshop on the Use of Computational Methods in the Study of Endangered Languages},pages={83--93},year={2024}}
Breakthroughs in Tibetan NLP & Digital Humanities
Marieke Meelen, Sebastian Nehrdich, and Kurt Keutzer
@article{meelen2024breakthroughs,title={Breakthroughs in Tibetan NLP \& Digital Humanities},author={Meelen, Marieke and Nehrdich, Sebastian and Keutzer, Kurt},year={2024},volume={72},pages={5-25},journal={Revue d'Etudes Tib\'etaines},publisher={Centre National de la Recherche Scientifique (CNRS), Paris}}
The Diachronic Annotated Corpus of Newar: From manuscript to morphosyntax
Alexander James O’Neill and Marieke Meelen
Cahiers de Linguistique Asie Orientale , 54 , pp. 162–191
@article{o2024diachronic,title={The Diachronic Annotated Corpus of Newar: From manuscript to morphosyntax},author={O’Neill, Alexander James and Meelen, Marieke},journal={Cahiers de Linguistique Asie Orientale},volume={54},issue={2},doi={10.1163/19606028-bja10047},pages={162–191},year={2024},publisher={Brill}}
The diachrony of Welsh subject pronouns
Marieke Meelen and David Willis
Studia Celtica Posnaniensia Special Issue: Noun phrase and pronominal syntax in medieval and early modern Celtic languages , 9 , pp. 84-111
@article{meelen2025diachrony,title={The diachrony of Welsh subject pronouns},author={Meelen, Marieke and Willis, David},year={2024},volume={9},journal={Studia Celtica Posnaniensia Special Issue:
Noun phrase and pronominal syntax in medieval and early modern Celtic languages},pages={84-111},doi={10.14746/scp.2024.9.3},publisher={Sciendo}}
2022
Towards a historical treebank of Middle and Modern Welsh: Syntactic parsing
@article{meelen2022towards,title={Towards a historical treebank of Middle and Modern Welsh: Syntactic parsing},author={Meelen, Marieke and Willis, David},journal={Journal of Historical Syntax},volume={6},number={4-11},doi={10.18148/hs/2022.v6i4-11.135 },year={2022},publisher={Universit{\"a}t Konstanz}}
Crosslinguistic semantic textual similarity of Buddhist Chinese and Classical Tibetan
@article{felbur2022crosslinguistic,title={Crosslinguistic semantic textual similarity of Buddhist Chinese and Classical Tibetan},author={Felbur, Rafal and Meelen, Marieke and Vierthaler, Paul},year={2022},pages={1-23},volume={8},doi={10.5334/johd.86},publisher={Ubiquity Press}}
NLP pipeline for annotating (endangered) Tibetan and Newar varieties
Christian Faggionato, Nathan Hill, and Marieke Meelen
In Proceedings of the Workshop on Resources and Technologies for Indigenous, Endangered and Lesser-resourced Languages in Eurasia within the 13th Language Resources and Evaluation Conference , pp. 1-6
@inproceedings{faggionato2022nlp,title={NLP pipeline for annotating (endangered) Tibetan and Newar varieties},author={Faggionato, Christian and Hill, Nathan and Meelen, Marieke},editors={Ojha, Atul Kr. and Ahmadi, Sina, Liu, Chao-Hong and McCrae, John P. },booktitle={Proceedings of the Workshop on Resources and Technologies for Indigenous, Endangered and Lesser-resourced Languages in Eurasia within the 13th Language Resources and Evaluation Conference},pages={1-6},year={2022}}
Towards coreference resolution for Early Irish
Mark Darling, Marieke Meelen, and David Willis
In Proceedings of the 4th Celtic Language Technology Workshop within LREC2022 , pp. 85–93
@inproceedings{darling2022towards,title={Towards coreference resolution for Early Irish},author={Darling, Mark and Meelen, Marieke and Willis, David},year={2022},booktitle={Proceedings of the 4th Celtic Language Technology Workshop within LREC2022},editors={Fransen, Theodorus and Lamb, William and Prys, Delyth},pages={85–93},organization={Association for Computational Linguistics}}
Creating annotated corpora for historical languages
@article{meelen2022creating,title={Creating annotated corpora for historical languages},author={Meelen, Marieke and Willis, David},journal={Journal of Historical Syntax},volume={6},number={4-11},pages={1--5},doi={10.18148/hs/2022.v6i4-11.164},year={2022}}
@article{meelen2023cognates,title={What are cognates?},author={Meelen, Marieke and Hill, Nathan W and Fellner, Hannes},journal={Papers in Historical Phonology},year={2022},volume={7},pages={44-80},doi={10.2218/pihph.7.2022.7405},publisher={Edinburgh University Library}}
2021
Optimisation of the largest annotated Tibetan corpus combining rule-based, memory-based, and deep-learning methods
Marieke Meelen, Élie Roux, and Nathan Hill
ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP) , 20 , pp. 1–11
@article{meelen2021optimisation,title={Optimisation of the largest annotated Tibetan corpus combining rule-based, memory-based, and deep-learning methods},author={Meelen, Marieke and Roux, {\'E}lie and Hill, Nathan},journal={ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)},volume={20},number={1},pages={1--11},year={2021},publisher={ACM New York, NY, USA}}
Towards a historical treebank of Middle and Early Modern Welsh, part I: Workflow and POS tagging
@article{meelen2021towards,title={Towards a historical treebank of Middle and Early Modern Welsh, part I: Workflow and POS tagging},author={Meelen, Marieke and Willis, David},journal={Journal of Celtic Linguistics},volume={22},number={1},pages={125--154},year={2021},doi={10.18148/hs/2022.v6i4-11.135},publisher={University of Wales Press}}
Old Catalan Morphosyntax: Developing an Annotated Corpus
@incollection{meelen2020,author={Meelen, Marieke},title={{Annotating Middle Welsh: POS tagging and chunk-parsing a partial corpus of native prose}},pages={27-47},editors={Lash, Elliott and
Qiu, Fangzhe and Stifter, David},publisher={Berlin: De Gruyter},booktitle={Corpus-based approaches to morphoysyntactic variation and change
in medieval Celtic languages},year={2020}}
Adjectival agreement in Middle and Early Modern Welsh native and translated prose
@article{meelen2020adjectival,title={Adjectival agreement in Middle and Early Modern Welsh native and translated prose},author={Meelen, Marieke and Nurmio, Silva},journal={Journal of Celtic Linguistics},volume={21},number={1},doi={10.16922/jcl.21.2},pages={1--28},year={2020},publisher={University of Wales Press}}
@incollection{cheng2020v3,title={V3 in Dutch urban varieties},author={Cheng, Lisa L and Meelen, Marieke and Mourigh, Khalid},editors={B{\'a}r{\'a}ny, A and Biberauer, T and Douglas, J and Vikner, S},journal={Open Generative Syntax},booktitle={Syntactic Architecture and its Consequences},doi={10.5281/zenodo.4280657},pages={327--355},year={2020},publisher={Language Science Press}}
Meta-dating the parsed corpus of Tibetan (PACTib)
Marieke Meelen and Élie Roux
In Proceedings of the 19th International Workshop on Treebanks and Linguistic Theories , pp. 31–42
@inproceedings{meelen2020meta,title={Meta-dating the parsed corpus of Tibetan (PACTib)},author={Meelen, Marieke and Roux, {\'E}lie},booktitle={Proceedings of the 19th International Workshop on Treebanks and Linguistic Theories},doi={10.18653/v1/2020.tlt-1.3},pages={31--42},year={2020}}
(Not so) Great Expectations: Listening to foreign-accented speech reduces the brain’s anticipatory processes
Niels O Schiller, Bastien P-A Boutonnet, Marianne LS De Heer Kloots, Marieke Meelen, and 2 more authors
@article{schiller2020not,title={(Not so) Great Expectations: Listening to foreign-accented speech reduces the brain’s anticipatory processes},author={Schiller, Niels O and Boutonnet, Bastien P-A and De Heer Kloots, Marianne LS and Meelen, Marieke and Ruijgrok, Bobby and Cheng, Lisa L-S},journal={Frontiers in Psychology},volume={11},pages={2143},year={2020},doi={10.3389/fpsyg.2020.02143},publisher={Frontiers Media SA}}
@incollection{meelen2020reconstructing,title={Reconstructing the rise of Verb Second in Welsh},author={Meelen, Marieke},booktitle={Rethinking Verb Second},editor={Woods, Rebecca and Wolfe, Sam},year={2020},pages={426–454},publisher={Oxford University Press}}
2019
Developing the Old Tibetan treebank
Christian Faggionato and Marieke Meelen
In Proceedings of Recent Advances in Natural Language Processing , pp. 304–312
@inproceedings{faggionato2019developing,title={Developing the Old Tibetan treebank},author={Faggionato, Christian and Meelen, Marieke},year={2019},pages={304–312},booktitle={Proceedings of Recent Advances in Natural Language Processing},doi={10.26615/978-954-452-056-4_035},publisher={Incoma Ltd., Shoumen, Bulgaria}}
2017
Object-initial word order in Middle Welsh narrative prose
Marieke Meelen
In Referential Properties and Their Impact on the Syntax of Insular Celtic Languages. Studien und Texte zur Keltologie 14 , pp. 145-178
@incollection{meelen2017,author={Meelen, Marieke},title={{Object-initial word order in Middle Welsh narrative prose}},pages={145-178},editors={Widmer, and Poppe, Erich},publisher={M\"unster: Nodus Publikationen},booktitle={Referential Properties and Their Impact on the Syntax of Insular Celtic Languages. Studien und Texte zur Keltologie 14},year={2017}}
Segmenting and POS tagging Classical Tibetan using a memory-based tagger
@article{meelenhill2017segmenting,title={Segmenting and POS tagging Classical Tibetan using a memory-based tagger},author={Meelen, Marieke and Hill, Nathan},journal={Himalayan Linguistics},doi={10.5070/H916234501},volume={16},number={2},pages={64-89},year={2017}}
2015
Promoting youth development worldwide: The Duke of Edinburgh’s international award
Eva van Baren, Marieke Meelen, and Lucas CPM Meijs
@article{van2015promoting,title={Promoting youth development worldwide: The Duke of Edinburgh’s international award},author={Baren, Eva van and Meelen, Marieke and Meijs, Lucas CPM},journal={Journal of Youth Development},volume={10},number={1},pages={1-14},year={2015}}
Annotated Corpora & Other Datasets
2025
Ground Truth for PaganTibet Ume models 1 and 2
Rachael M. Griffiths, Marieke Meelen, Daniel Berounský, Marc Jardins, and 17 more authors
@dataset{griffithsetal2025_ume1+2GT,author={Griffiths, Rachael M. and Meelen, Marieke and Berounsk\'y, Daniel and des Jardins, Marc and Gurung, Kalsang Norbu and Mulraney, Stephen and Punzi, Valentina and Ramble, Charles and Szab\'oov\'a, Linda and Tsering, Naljor and Tso, Kundru and Chokgyal, Sherab and Gyaltsen, Tsultrim and Drukgyal, Tsewang and Gyatso, Tsognyi and Lhundup, Tritsuk and Palsang, Tsultrim and Rabsal, Tsukphud and Wangchuk, Palgyi and Woeser, Tsugphud and Woser, Sherap},title={{Ground Truth for PaganTibet Ume models 1 and 2}},month=oct,year={2025},publisher={Zenodo},doi={10.5281/zenodo.17275724},}
HTR Input and Correction Cheat Sheet: 10 Basic Rules and Protocols for Diplomatic Transcription
@misc{griffithsmeelen2025a,author={Griffiths, Rachael M. and Meelen, Marieke},title={{HTR Input and Correction Cheat Sheet: 10 Basic Rules
and Protocols for Diplomatic Transcription
}},year={2025},publisher={Zenodo},version={3.1},doi={10.5281/zenodo.17251318},}
@misc{meelengriffiths2025,author={Meelen, Marieke and Griffiths, Rachael M.},title={{HTR Input and Correction Manual}},year={2025},publisher={Zenodo},doi={10.5281/zenodo.17257009},}
2024
Classical Newar Annotation Manual: Part I - Preprocessing & Segmentation
@misc{oneillmeelen2024,author={O'Neill, Alexander and Meelen, Marieke},title={{Classical Newar Annotation Manual: Part I - Preprocessing & Segmentation}},year={2024},publisher={Zenodo},}
Classical Newar Annotation Manual: Part II - Part-of-Speech Tagging
@misc{oneillmeelen2025,author={O'Neill, Alexander and Meelen, Marieke},title={{Classical Newar Annotation Manual: Part II - Part-of-Speech
Tagging}},year={2024},publisher={Zenodo},13117962}}
@dataset{oneillmeelen2026,author={O'Neill, Alexander and Meelen, Marieke},title={{Diachronic Annotated Corpus of Newar (DACON)}},year={2024},publisher={Zenodo},12887386}}
2023
Classical Tibetan Annotation Manual Part II - Segmentation & POS tagging
Marieke Meelen, Christian Faggionato, and Nathan Hill
@misc{meelenfaggionatohill2023,author={Meelen, Marieke and Faggionato, Christian and Hill, Nathan},title={Classical Tibetan Annotation Manual Part II - Segmentation \& POS tagging},year={2023},publisher={Zenodo},}
2022
An audio-visual archive of Dzardzongke (South Mustang Tibetan
@dataset{meelenramble2022,author={Meelen, Marieke},title={An audio-visual archive of Dzardzongke (South Mustang Tibetan},year={2022},publisher={ELAR},}
@dataset{meelenpujol2022,author={Meelen, Marieke and Pujol i Campeny, Afra},title={The first annotated corpus of Old Catalan},year={2022},publisher={Zenodo},}
@dataset{meelenroux2020,author={Meelen, Marieke and Roux, Élie},title={The Annotated Corpus of Classical Tibetan (ACTib)
- Version 2.0 (Segmented \& POS-tagged)
},year={2020},publisher={Zenodo},}
The Annotated Corpus of Classical Tibetan (ACTib) - Version 2.0 (Segmented & POS-tagged)
@dataset{meelenroux2021,author={Meelen, Marieke and Roux, Élie},title={The Annotated Corpus of Classical Tibetan (ACTib)
- Version 2.0 (Segmented \& POS-tagged)
},year={2020},publisher={Zenodo},}
@dataset{meelensackmannparina2018,author={Meelen, Marieke and Sackmann, Raphael and Parina, Elena},title={PARSHCWL – The annotated texts of the Llyfr yr Ancr},year={2018},}
An audio-visual archive and searchable corpus of Kaike, an endangered Tibeto-Burman language of Dolpa, Nepal
@dataset{meelen2018,author={Meelen, Marieke},title={An audio-visual archive and searchable corpus of Kaike, an endangered Tibeto-Burman language
of Dolpa, Nepal},year={2018},publisher={ELAR},}