@inproceedings{vancranenburgh2010invented, title={Invented antonyms: {Esperanto} as a semantic lab}, author={van Cranenburgh, Andreas and Galit Sassoon and Raquel Fern\'andez}, year={2010}, booktitle={Proceedings of the 26th Annual Meeting of the Israel Association for Theoretical Linguistics (IATL 26)}, url={http://dare.uva.nl/en/record/371912} } @inproceedings{vancranenburgh2011dop, title={Discontinuous Data-Oriented Parsing: A mildly context-sensitive all-fragments grammar}, author={van Cranenburgh, Andreas and Scha, Remko and Sangati, Federico}, year={2011}, booktitle={Proceedings of SPMRL}, pages={34--44}, url={http://aclweb.org/anthology/W11-3805} } @inproceedings{vancranenburgh2012efficient, title={Efficient parsing with linear context-free rewriting systems}, author={van Cranenburgh, Andreas}, year={2012}, booktitle={Proceedings of EACL}, pages={460--470}, note={Corrected version: \url{http://andreasvc.github.io/eacl2012corrected.pdf}} } @inproceedings{vancranenburgh2012literary, title={Literary authorship attribution with phrase-structure fragments}, author={van Cranenburgh, Andreas}, year={2012}, booktitle={Proceedings of CLFL}, pages={59--63}, note={Revised version: \url{http://andreasvc.github.io/clfl2012.pdf}} } @inproceedings{aloni2012indefinites, title={Building a Corpus of Indefinite Uses Annotated with Fine-grained Semantic Functions}, author={Maria Aloni and Andreas van Cranenburgh and Raquel Fern\'andez and Marta Sznajder}, year={2012}, booktitle={The eighth international conference on Language Resources and Evaluation (LREC)}, url={http://www.lrec-conf.org/proceedings/lrec2012/pdf/362_Paper.pdf} } @techreport{vancranenburgh2012extracting, title={Extracting tree fragments in linear average time}, author={van Cranenburgh, Andreas}, year={2012}, school={University of Amsterdam}, institution={Institute for Logic, Language and Computation (ILLC)}, number={PP-2012-18}, url={http://dare.uva.nl/en/record/421534} } @inproceedings{vancranenburgh2013disc, title={Discontinuous parsing with an efficient and accurate {DOP} model}, author={van Cranenburgh, Andreas and Rens Bod}, year={2013}, booktitle={Proceedings of IWPT}, pages={7--16}, url={http://aclweb.org/anthology/W13-5701} } @inproceedings{jautze2013high, title={From high heels to weed attics: a syntactic investigation of chick lit and literature}, author={Jautze, Kim and Koolen, Corina and van Cranenburgh, Andreas and de Jong, Hayco}, year={2013}, booktitle={Proc.~of workshop Computational Linguistics for Literature}, pages={72--81}, url={http://aclweb.org/anthology/W13-1410} } @article{vancranenburgh2014linear, title={Extraction of phrase-structure fragments with a linear average time tree kernel}, author={van Cranenburgh, Andreas}, year={2014}, journal={Computational Linguistics in the Netherlands Journal}, volume={4}, pages={3--16}, issn={2211-4009}, url={https://clinjournal.org/clinj/article/view/36}, } @article{roorda2014hebrew, title={{LAF}-Fabric: a data analysis tool for linguistic annotation framework with an application to the {Hebrew Bible}}, author={Roorda, Dirk and Kalkman, Gino and Naaijer, Martijn and van Cranenburgh, Andreas}, year={2014}, journal={Computational Linguistics in the Netherlands Journal}, volume={4}, pages={105--120}, issn={2211-4009}, url={https://clinjournal.org/clinj/article/view/44}, } @inproceedings{vancranenburgh2015identifying, title={Identifying literary texts with bigrams}, author={van Cranenburgh, Andreas and Koolen, Corina}, year={2015}, booktitle={Proceedings of CLFL}, pages={58-67}, url={http://aclweb.org/anthology/W15-0707} } @inproceedings{sangati2015mwe, title={Multiword expression identification with recurring tree fragments and association measures}, author={Sangati, Federico and van Cranenburgh, Andreas}, year={2015}, booktitle={Proceedings of the 11th Workshop on Multiword Expressions}, pages={10--18}, url={http://aclweb.org/anthology/W15-0902} } @article{vancranenburgh2016machine, title={Machine Learning Literature using Textual Features}, year={2016}, author={van Cranenburgh, Andreas}, journal={Tiny Transactions on Computer Science}, volume={4}, url={http://tinytocs.ece.utexas.edu/papers/tinytocs4_paper_cranenburgh.pdf} } @inproceedings{jautze2016topic, title={Topic Modeling Literary Quality}, author={Kim Jautze and Andreas van Cranenburgh and Corina Koolen}, year={2016}, booktitle={Digital Humanities 2016: Conference Abstracts}, pages={233--237}, address={Kr\'akow, Poland}, url={http://dh2016.adho.org/abstracts/95}, } @article{vancranenburgh2016disc, title={Data-Oriented Parsing with discontinuous constituents and function tags}, author={van Cranenburgh, Andreas and Remko Scha and Rens Bod}, year={2016}, journal={Journal of Language Modelling}, volume={4}, number={1}, pages={57--111}, url={http://dx.doi.org/10.15398/jlm.v4i1.100} } @phdthesis{vancranenburgh2016rich, title={Rich Statistical Parsing and Literary Language}, author={van Cranenburgh, Andreas}, year={2016}, school={University of Amsterdam}, note={Revised version: \url{http://andreasvc.github.io/phdthesis_v1.1.pdf}}, } @inproceedings{vancranenburgh2017literary, title={A Data-Oriented Model of Literary Language}, author={van Cranenburgh, Andreas and Bod, Rens}, year={2017}, booktitle={Proceedings of EACL}, pages={1228--1238}, url={http://aclweb.org/anthology/E17-1115} } @inproceedings{koolen2017stereotypes, title={These are not the Stereotypes You are Looking For: Bias and Fairness in Authorial Gender Attribution}, author={Koolen, Corina and van Cranenburgh, Andreas}, year={2017}, booktitle={Proceedings of the First Ethics in NLP workshop}, pages={12--22}, url={http://aclweb.org/anthology/W17-1602} } @article{koolen2018blue, author={Koolen, Corina and van Cranenburgh, Andreas}, title={Blue eyes and porcelain cheeks: Computational extraction of physical descriptions from {Dutch} chick lit and literary novels}, year={2018}, journal={Digital Scholarship in the Humanities}, volume={33}, number={1}, pages={59--71}, url={https://academic.oup.com/dsh/article/3091837}, } @InProceedings{bladier2018supertagging, author={Bladier, Tatiana and van Cranenburgh, Andreas and Samih, Younes and Kallmeyer, Laura}, title={German and French Neural Supertagging Experiments for {LTAG} Parsing}, year={2018}, booktitle={Proceedings of ACL 2018, Student Research Workshop}, pages={59--66}, url={http://aclweb.org/anthology/P18-3009} } @InProceedings{vancranenburgh2018cliche, author={van Cranenburgh, Andreas}, title={Cliche Expressions in Literary and Genre Novels}, year={2018}, booktitle={Proceedings of LaTeCH-CLfL workshop}, pages={34--43}, url={http://www.aclweb.org/anthology/W18-4504} } @InProceedings{vancranenburgh2018active, author={van Cranenburgh, Andreas}, title={Active {DOP}: A constituency treebank annotation tool with online learning}, year={2018}, booktitle={Proceedings of COLING system demonstrations}, pages={38--42}, url={http://www.aclweb.org/anthology/C18-2009} } @InProceedings{bladier2018rrg, author={Tatiana Bladier and van Cranenburgh, Andreas and Kilian Evang and Laura Kallmeyer and Robin M\"ollemann and Rainer Osswald}, year={2018}, title={RRGbank: a Role and Reference Grammar Corpus of Syntactic Structures Extracted from the Penn Treebank}, booktitle={Proceedings of Treebanks and Linguistic Theories}, pages={5--16}, url={http://www.ep.liu.se/ecp/155/003/ecp18155003.pdf} } @article{vancranenburgh2019vecspace, author={van Cranenburgh, Andreas and van Dalen-Oskam, Karina and van Zundert, Joris}, title={Vector space explorations of literary language}, year={2019}, journal={Language Resources and Evaluation}, volume={53}, number={4}, pages={625--650}, abstract="Literary novels are said to distinguish themselves from other novels through conventions associated with literariness. We investigate the task of predicting the literariness of novels as perceived by readers, based on a large reader survey of contemporary Dutch novels. Previous research showed that ratings of literariness are predictable from texts to a substantial extent using machine learning, suggesting that it may be possible to explain the consensus among readers on which novels are literary as a consensus on the kind of writing style that characterizes literature. Although we have not yet collected human judgments to establish the influence of writing style directly (we use a survey with judgments based on the titles of novels), we can try to analyze the behavior of machine learning models on particular text fragments as a proxy for human judgments. In order to explore aspects of the texts associated with literariness, we divide the texts of the novels in chunks of 2--3 pages and create vector space representations using topic models (Latent Dirichlet Allocation) and neural document embeddings (Distributed Bag-of-Words Paragraph Vectors). We analyze the semantic complexity of the novels using distance measures, supporting the notion that literariness can be partly explained as a deviation from the norm. Furthermore, we build predictive models and identify specific keywords and stylistic markers related to literariness. While genre plays a role, we find that the greater part of factors affecting judgments of literariness are explicable in bag-of-words terms,even in short text fragments and among novels with higher literary ratings. The code and notebook used to produce the results in this paper are available at https://github.com/andreasvc/litvecspace.", issn={1574-0218}, doi={10.1007/s10579-018-09442-4}, url={https://doi.org/10.1007/s10579-018-09442-4} } @inproceedings{vancranenburgh2019literary, author={Andreas van Cranenburgh and Corina Koolen}, year={2019}, title={The Literary Pepsi Challenge: intrinsic and extrinsic factors in judging literary quality}, booktitle={Digital Humanities 2019: Conference Abstracts}, address={Utrecht, The Netherlands}, url={http://andreasvc.github.io/dh2019.pdf}, } @article{vancranenburgh2019coref, author={Andreas van Cranenburgh}, year={2019}, title={A {Dutch} coreference resolution system with an evaluation on literary fiction}, journal={Computational Linguistics in the Netherlands Journal}, volume={9}, pages={27--54}, url={https://clinjournal.org/clinj/article/view/91}, } @article{koolen2020survey, author = {Corina Koolen and Karina van Dalen-Oskam and Andreas van Cranenburgh and Erica Nagelhout}, title = {Literary quality in the eye of the {Dutch} reader: The National Reader Survey}, journal = {Poetics}, year = {2020}, volume = {79}, issn = {0304-422X}, url = {https://doi.org/10.1016/j.poetic.2020.101439}, keywords = "Readers, Literary quality, Popular fiction, Bestsellers, Survey" } @inproceedings{tulkens2020embarrassingly, title = {Embarrassingly Simple Unsupervised Aspect Extraction}, author = {Tulkens, St{\'e}phan and van Cranenburgh, Andreas}, booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics}, year = {2020}, url = {https://www.aclweb.org/anthology/2020.acl-main.290}, doi = {10.18653/v1/2020.acl-main.290}, pages = {3182--3187}, } @misc{devries2020bertje, title = {{BERTje}: {A} {Dutch} {BERT} {Model}}, author = {de Vries, Wietse and van Cranenburgh, Andreas and Bisazza, Arianna and Caselli, Tommaso and Noord, Gertjan van and Nissim, Malvina}, year = {2019}, url = {http://arxiv.org/abs/1912.09582}, howpublished = {arXiv:1912.09582}, month = dec, } @inproceedings{devries2020bertlayers, title = {What's so special about {BERT}'s layers? {A} closer look at the {NLP} pipeline in monolingual and multilingual models}, author = {de Vries, Wietse and van Cranenburgh, Andreas and Nissim, Malvina}, year = {2020}, booktitle = {Findings of EMNLP}, pages = {4339--4350}, url = {https://www.aclweb.org/anthology/2020.findings-emnlp.389}, } @inproceedings{vancranenburgh2020blind, title = {Results of a Single Blind Literary Taste Test with Short Anonymized Novel Fragments}, author = {van Cranenburgh, Andreas and Koolen, Corina}, year = {2020}, booktitle = {Proceedings of LaTeCH-CLfL}, pages = {121--126}, url = {https://www.aclweb.org/anthology/2020.latechclfl-1.14}, } @inproceedings{poot2020benchmark, title = {A Benchmark of Rule-Based and Neural Coreference Resolution in {Dutch} Novels and News}, author = {Poot, Corb{\`e}n and van Cranenburgh, Andreas}, year = {2020}, booktitle = {Proceedings of CRAC}, pages = {79--90}, url = {https://www.aclweb.org/anthology/2020.crac-1.9}, } @article{luoto2021dataset, title = {Psycholinguistic dataset on language use in 1145 novels published in {English} and {Dutch}}, author = {Luoto, Severi and van Cranenburgh, Andreas}, year = {2021}, journal = {Data in Brief}, volume = {34}, pages = {106655}, issn = {2352-3409}, url = {https://doi.org/10.1016/j.dib.2020.106655}, keywords = {Stylometry, Literature, LIWC, Psycholinguistics, Corpus linguistics, Digital humanities, Sex, Sexual orientation}, } @inproceedings{vancranenburgh2021stylometric, title = {Stylometric Literariness Classification: the Case of {Stephen King}}, author = {van Cranenburgh, Andreas and Ketzan, Erik}, booktitle = {Proceedings of LaTeCH-CLfL}, year = {2021}, url = {https://aclanthology.org/2021.latechclfl-1.21}, pages = {189--197}, } @inproceedings{vancranenburgh2021hybrid, title = {A Hybrid Rule-Based and Neural Coreference Resolution System with an Evaluation on {D}utch Literature}, author = {van Cranenburgh, Andreas and Ploeger, Esther and van den Berg, Frank and Th{\"u}ss, Remi}, booktitle = {Proceedings of CRAC}, year = {2021}, url = {https://aclanthology.org/2021.crac-1.5}, pages = {47--56}, } @article{vancranenburgh2022openboek, author={van Cranenburgh, Andreas and van Noord, Gertjan}, year={2022}, title={OpenBoek: A Corpus of Literary Coreference and Entities with an Exploration of Historical Spelling Normalization}, journal={Computational Linguistics in the Netherlands Journal}, volume={12}, month={Dec.}, pages={235–251}, url={https://clinjournal.org/clinj/article/view/157}, abstract="We present OpenBoek: a corpus of 103k tokens of classic Dutch novels with annotated coreference and entities. The corpus has several properties that are challenging for current coreference models: long documents (fragments of 10k+ words each), domain-specific literary phenomena, and 19th century Dutch spelling. Spelling normalization is added to the corpus as an additional annotation layer, using a data-driven rule-based spelling normalization tool. Normalizations are added using meta-annotation, such that evaluation can be performed with annotations on the original texts without losing token alignment. This tool enables the application of parsing and coreference systems originally developed for modern Dutch. We evaluate parsing and coreference systems on the OpenBoek dataset and find that spelling normalization gives a substantial increase in performance. The OpenBoek corpus is available under an open license at https://andreasvc.github.io/openboek/ ", } @inproceedings{vancranenburgh2023qa, title={Direct Speech Quote Attribution for {D}utch Literature}, author={van Cranenburgh, Andreas and van den Berg, Frank}, booktitle={Proceedings of LaTeCH-CLfL}, year={2023}, url={https://aclanthology.org/2023.latechclfl-1.6}, pages={45--62}, abstract="We present a dataset and system for quote attribution in Dutch literature. The system is implemented as a neural module in an existing NLP pipeline for Dutch literature (dutchcoref; van Cranenburgh, 2019). Our contributions are as follows. First, we provide guidelines for Dutch quote attribution and annotate 3,056 quotes in fragments of 42 Dutch literary novels, both contemporary and classic. Second, we present three neural quote attribution classifiers, optimizing for precision, recall, and F1. Third, we perform an evaluation and analysis of quote attribution performance, showing that in particular, quotes with an implicit speaker are challenging, and that such quotes are prevalent in contemporary fiction (57{\%}, compared to 32{\%} for classic novels). On the task of quote attribution, we achieve an improvement over the rule-based baseline of 8.0{\%} F1 points on contemporary fiction and 1.9{\%} F1 points on classic novels. Code, data, and models are available at https://github.com/frenkvdberg/dutchqa", } @article{visser2023distant, author={Visser Solissa, Noa and van Cranenburgh, Andreas}, year={2023}, title={A Distant Reading of Gender Bias in Dutch Literary Prizes}, journal={Digital Humanities Benelux journal}, volume={5}, url={https://journal.dhbenelux.org/wp-content/uploads/2023/09/DH_Benelux_Journal_Volume_5_3_Visser.pdf} } @inproceedings{zundert2023character, author={van Zundert, Joris and van Cranenburgh, Andreas and Roel Smeets}, year={2023}, title={Putting Dutchcoref to the Test: Character Detection and Gender Dynamics in Contemporary Dutch Novels}, booktitle={Computational Humanities Research conference}, pages={757--771}, url={https://ceur-ws.org/Vol-3558/paper9264.pdf} } @InCollection{toral2024literary, author={Antonio Toral and Andreas van Cranenburgh and Tia Nutters}, year={2024}, title={Literary-adapted machine translation in a well-resourced language pair: Explorations with More Data and Wider Contexts}, booktitle={Computer-Assisted Literary Translation, edited By Andrew Rothwell, Andy Way, Roy Youdale. Routledge}, chapter={1}, editor={Andrew Rothwell and Andy Way and Roy Youdale}, publisher={Routledge}, url={https://www.routledge.com/Computer-Assisted-Literary-Translation/Rothwell-Way-Youdale/p/book/9781032413006} }