Publications

Schmidt T (2003), "Korpus "Skandinavische Semikommunikation" - ein mehrsprachiges Diskurskorpus auf XML-Basis", In Sprachtechnologie für die multilinguale Kommunikation - Textproduktion, Recherche, Übersetzung, Lokalisierung. Beiträge der GLDV-Frühjahrstagung 2003 an der Hochschule Anhalt (FH) in Köthen. Sankt Augustin Vol. 18(1/2), pp. 421-427. Gardez!-Verlag.

BibTeX:

@inproceedings{Schmidt2003,
  author = {Schmidt, Thomas},
  editor = {Uta Seewald-Heeg},
  title = {Korpus "Skandinavische Semikommunikation" - ein mehrsprachiges Diskurskorpus auf XML-Basis},
  booktitle = {Sprachtechnologie für die multilinguale Kommunikation - Textproduktion, Recherche, Übersetzung, Lokalisierung. Beiträge der GLDV-Frühjahrstagung 2003 an der Hochschule Anhalt (FH) in Köthen},
  publisher = {Gardez!-Verlag},
  year = {2003},
  volume = {18},
  number = {1/2},
  pages = {421-427},
  note = {DE},
  url = {http://media.dwds.de/jlcl/2003_Doppelheft/421-427_Schmidt.pdf}
}

Angermeyer P, Bührig K and Meyer B (2013), "Community Interpreting Database Pilot Corpus (ComInDat)", Archived in Hamburger Zentrum für Sprachkorpora. Version 0.1. Publication date 2013-06-10. 6, 2013.

BibTeX:

@misc{comindat,
  author = {Angermeyer, Philipp and Bührig, Kristin and Meyer, Bernd},
  title = {Community Interpreting Database Pilot Corpus (ComInDat)},
  howpublished = {Archived in Hamburger Zentrum für Sprachkorpora. Version 0.1. Publication date 2013-06-10},
  year = {2013},
  note = {audio & video; EXMARaLDA},
  url = {http://hdl.handle.net/11022/0000-0000-51E4-3}
}

Angermeyer PS, Meyer B and Schmidt T (2012), "Sharing community interpreting corpora. A pilot study", In Multilingual Corpora and Multilingual Corpus Analysis. Vol. 14, pp. pp. 275–294. John Benjamins.

BibTeX:

@incollection{Angermeyer2012,
  author = {Philip S. Angermeyer and Bernd Meyer and Thomas Schmidt},
  editor = {Thomas Schmidt and Kai Wörner},
  title = {Sharing community interpreting corpora. A pilot study},
  booktitle = {Multilingual Corpora and Multilingual Corpus Analysis},
  publisher = {John Benjamins},
  year = {2012},
  volume = {14},
  pages = {pp. 275–294}
}

Baumgarten N, Herkenrath A, Schmidt T, Wörner K and Zeevaert L (2007), "Studying Connectivity with the Help of Computer-Readable Corpora: Some Exemplary Analyses from Modern and Historical, Written and Spoken Corpora", In Connectivity in Grammar and Discourse. Amsterdam Vol. 5, pp. 259-289. Benjamins.

[Abstract] [BibTeX]

Abstract: This paper discusses methodological aspects of the use of electronic language corpora for the study of connectivity. We demonstrate how a corpus-based approach was used to investigate functional characteristics of coordinating elements in sentence- or utterance-initial position across different languages (English, German, Old Swedish and Turkish), across different modalities (written and spoken) and across the diachronic dimension (historic and modern languages). Our focus is on the difficulties we encountered in this study when attempting to transfer corpus-based methods developed for the analysis of corpora of modern, written language to the analysis of corpora of historic or spoken language. We suggest an abstract corpus-linguistic workflow and discuss where and how this workflow differs according to the corpus type, and how well its individual steps are supported by current corpus technology.

BibTeX:

@incollection{Baumgarten2007,
  author = {Baumgarten, Nicole and Herkenrath, Annette and Schmidt, Thomas and Wörner, Kai and Zeevaert, Ludger},
  editor = {Jochen Rehbein and Christiane Hohenstein and Lukas Pietsch},
  title = {Studying Connectivity with the Help of Computer-Readable Corpora: Some Exemplary Analyses from Modern and Historical, Written and Spoken Corpora},
  booktitle = {Connectivity in Grammar and Discourse},
  publisher = {Benjamins},
  year = {2007},
  volume = {5},
  pages = {259-289},
  note = {EN}
}

Becher V (2011), "Explicitation and implicitation in translation. A corpus-based study of English-German and German-English translations of business texts". Thesis at: Universität Hamburg.

BibTeX:

@phdthesis{Becher,
  author = {Viktor Becher},
  title = {Explicitation and implicitation in translation. A corpus-based study of English-German and German-English translations of business texts},
  school = {Universität Hamburg},
  year = {2011}
}

Belz M and Klapi M (2013), "Pauses following fillers in L1 and L2 German Map Task dialogues.", In Proceedings of Disfluency in Spontaneous Speech, DiSS 2013.

BibTeX:

@inproceedings{Belz2013,
  author = {Malte Belz and Myriam Klapi},
  title = {Pauses following fillers in L1 and L2 German Map Task dialogues.},
  booktitle = {Proceedings of Disfluency in Spontaneous Speech, DiSS 2013},
  year = {2013},
  url = {http://www.diss2013.org/Proceedings_DiSS_2013.pdf}
}

Benet A, Cortés S and Lleó C (2012), "Phonoprosodic corpus of spoken Catalan (PhonCAT)", In Multilingual Corpora and Multilingual Corpus Analysis. Vol. 14, pp. pp. 215–229. John Benjamins.

[Abstract] [BibTeX]

Abstract: This article describes the corpus of spoken Catalan elaborated within the research project “Phonoprosodic development of Catalan in its current bilingual context”. The corpus contains 174 interviews with speakers from three districts of Barcelona varying on the presence of Spanish. The subjects belong to three age groups: children aged 3 to 5, young people aged 19 to 23 and adults aged 32 to 40. The collected data consist of semi-spontaneous speech, free conversations, a role-play, a reading task and a sociolinguistic questionnaire. The goals of the project include auditory and acoustic analyses of Catalan segments (exemplified here by some results on vowels), the study of loan words and of cognates with different gender across Catalan and Spanish, as well as prosodic analyses of intonational phrasing of declaratives and interrogatives.

BibTeX:

@incollection{Benet2012,
  author = {Ariadna Benet and Susana Cortés and Conxita Lleó},
  editor = {Thomas Schmidt and Kai Wörner},
  title = {Phonoprosodic corpus of spoken Catalan (PhonCAT)},
  booktitle = {Multilingual Corpora and Multilingual Corpus Analysis},
  publisher = {John Benjamins},
  year = {2012},
  volume = {14},
  pages = {pp. 215–229}
}

Braunmüller K (2011), "Faroese Danish Corpus Hamburg (FADAC Hamburg)", Archived in Hamburger Zentrum für Sprachkorpora. Version 0.2. Publication date 2011-06-30. 6, 2011.

BibTeX:

@misc{fadac,
  author = {Braunmüller, Kurt},
  title = {Faroese Danish Corpus Hamburg (FADAC Hamburg)},
  howpublished = {Archived in Hamburger Zentrum für Sprachkorpora. Version 0.2. Publication date 2011-06-30},
  year = {2011},
  note = {audio; EXMARaLDA},
  url = {http://hdl.handle.net/11022/0000-0000-5B4D-5}
}

Brehmer B (2011), "Hamburg Corpus of Polish in Germany (HamCoPoliG)", Archived in Hamburger Zentrum für Sprachkorpora. Version 0.2. Publication date 2011-09-02. 9, 2011.

BibTeX:

@misc{hamcopolig,
  author = {Brehmer, Bernhard},
  title = {Hamburg Corpus of Polish in Germany (HamCoPoliG)},
  howpublished = {Archived in Hamburger Zentrum für Sprachkorpora. Version 0.2. Publication date 2011-09-02},
  year = {2011},
  note = {audio; EXMARaLDA},
  url = {http://hdl.handle.net/11022/0000-0000-63CE-9}
}

Brünner G (2002), "Kritische Stellungnahme zur Darstellung des Systems HIAT in Norbert Dittmars Buch „Transkription“", Gesprächsforschung. Vol. 3, pp. 29-32.

BibTeX:

@article{Bruenner2002a,
  author = {Brünner, Gisela},
  title = {Kritische Stellungnahme zur Darstellung des Systems HIAT in Norbert Dittmars Buch „Transkription“},
  journal = {Gesprächsforschung},
  year = {2002},
  volume = {3},
  pages = {29-32},
  note = {DE},
  url = {http://www.gespraechsforschung-ozs.de/fileadmin/dateien/heft2002/ko-bruenner.pdf}
}

Bührig K, Kliche O, Meyer B and Pawlack B (2012), "The corpus “Interpreting in Hospitals”. Possible applications for research and communication training", In Multilingual Corpora and Multilingual Corpus Analysis. Vol. 14, pp. pp. 305–315. John Benjamins.

[Abstract] [BibTeX]

Abstract: This paper explores the question how language corpora can enhance discourse analytic research as well as communication trainings. To do this, we refer to the language corpus “Interpreting in hospitals”, and begin by describing it in detail. Subsequently, the paper exemplifies how the corpus was used to analyse ad-hoc-interpreting in medical settings, focusing on the function of specific linguistic elements and speech actions (Bührig & Meyer 2004). Finally, the paper shows how research findings based on the corpus and data from the corpus can be used in communication trainings, describing a training for bilingual hospital employees. The corpus allows trainers to identify relevant training contents, and it offers the possibility to integrate sections of authentic discourse in the training. The paper illustrates how the training participants accepted and worked with the discourse data, and draws conclusions concerning the use of corpus based analyses in trainings on workplace communication.

BibTeX:

@incollection{Buehrig2012,
  author = {Kristin Bührig and Ortrun Kliche and Bernd Meyer and Birte Pawlack},
  editor = {Thomas Schmidt and Kai Wörner},
  title = {The corpus “Interpreting in Hospitals”. Possible applications for research and communication training},
  booktitle = {Multilingual Corpora and Multilingual Corpus Analysis},
  publisher = {John Benjamins},
  year = {2012},
  volume = {14},
  pages = {pp. 305–315}
}

Bührig K and Meyer B (2009), "Dolmetschen im Krankenhaus (DiK)", Archived in Hamburger Zentrum für Sprachkorpora. Version 1.1. Publication date 2009-01-05. 1, 2009.

BibTeX:

@misc{dik,
  author = {Bührig, Kristin and Meyer, Bernd},
  title = {Dolmetschen im Krankenhaus (DiK)},
  howpublished = {Archived in Hamburger Zentrum für Sprachkorpora. Version 1.1. Publication date 2009-01-05},
  year = {2009},
  note = {audio; EXMARaLDA},
  url = {http://hdl.handle.net/11022/0000-0000-523B-2}
}

Czachór A (2012), "Corpus of Polish spoken in Germany. Collecting and analysing written & spoken data for investigating contact-induced change", In Multilingual Corpora and Multilingual Corpus Analysis. Vol. 14, pp. pp. 153–161. John Benjamins.

[Abstract] [BibTeX]

Abstract: The present paper reflects on methodological aspects of the data gathering, analysis, and reuse and will present the practical experience from designing a test battery and selecting and approaching the participants, to conducting the experiments. Our project aims to provide a descriptive survey of contact-induced change in two groups of bilingual (L1 Polish) speakers currently living in Germany. The corpus contains written and spoken, elicited and free data that complement each other with regard to investigating morphosyntactic phenomena. This paper gives a description of each part of our tests: selecting the topics, conducting the experiment, and choosing technical equipment needed for recording of the speech data; stimuli presentation and description of the software used for grammaticality judgments; construction of a gapped text, and finally the sociolinguistic questionnaire and self-evaluation tasks.

BibTeX:

@incollection{Czachor2012,
  author = {Agnieszka Czachór},
  editor = {Thomas Schmidt and Kai Wörner},
  title = {Corpus of Polish spoken in Germany. Collecting and analysing written & spoken data for investigating contact-induced change},
  booktitle = {Multilingual Corpora and Multilingual Corpus Analysis},
  publisher = {John Benjamins},
  year = {2012},
  volume = {14},
  pages = {pp. 153–161}
}

Deppermann A and Schmidt T (2014), "Gesprächsdatenbanken als methodisches Instrument der Interaktionalen Linguistik - Eine exemplarische Untersuchung auf Basis des Korpus FOLK in der Datenbank für Gesprochenes Deutsch (DGD2).", Mitteilungen des Deutschen Germanistenverbandes. Vol. 1, pp. 4-17.

BibTeX:

@article{DeppermannSchmidt2014,
  author = {Arnulf Deppermann and Thomas Schmidt},
  title = {Gesprächsdatenbanken als methodisches Instrument der Interaktionalen Linguistik - Eine exemplarische Untersuchung auf Basis des Korpus FOLK in der Datenbank für Gesprochenes Deutsch (DGD2).},
  journal = {Mitteilungen des Deutschen Germanistenverbandes},
  year = {2014},
  volume = {1},
  pages = {4-17},
  url = {http://ids-pub.bsz-bw.de/frontdoor/index/index/docId/2222}
}

Diewald G, Lehmberg T and Smirnova E (2007), "KALI - A Diachronic Corpus for the Investigation of Grammaticalization and Semantic Change", In Proceedings of the GLDV Annual Meeting 2007, April 11-13, Tübingen.

BibTeX:

@inproceedings{Diewald2007,
  author = {Diewald, Gabriele and Lehmberg, Timm and Smirnova, Elena},
  title = {KALI - A Diachronic Corpus for the Investigation of Grammaticalization and Semantic Change},
  booktitle = {Proceedings of the GLDV Annual Meeting 2007, April 11-13, Tübingen},
  year = {2007}
}

Dipper S, Hinrichs E, Schmidt T, Wagner A and Witt A (2006), "Sustainability of Linguistic Resources", In Proceedings of the LREC 2006 Satellite Workshop on "Merging and Layering Linguistic Information", Genoa 2006.

[Abstract] [BibTeX] [URL]

Abstract: This paper describes a new research initiative addressing the issue of sustainability of linguistic resources. This initiative is a cooperation between three linguistic collaborative research centres in Germany, which comprise more than 40 individual research projects altogether. These projects are involved in creating manifold language resources, especially corpora, tailored to their particular needs. The aim of the project described here is to ensure an effective and sustainable access of these data by third-party researchers beyond the termination of these projects. This goal involves a number of measures, such as the definition of a common data format to completely capture the heterogeneous information encoded in the individual corpora, the development of user-friendly and sustainably usable tools for processing (e.g. querying) the data, and the specification of common inventories of metadata and terminology. Moreover, the project aims at formulating general rules of best practice for creating, accessing, and archiving linguistic resources.

BibTeX:

@inproceedings{Dipper2006,
  author = {Dipper, Stefanie and Hinrichs, Erhard and Schmidt, Thomas and Wagner, Andreas and Witt, Andreas},
  editor = {Hinrichs, Erhard and Ide, Nancy and Palmer, Martha and Pustejovsky, James},
  title = {Sustainability of Linguistic Resources},
  booktitle = {Proceedings of the LREC 2006 Satellite Workshop on "Merging and Layering Linguistic Information", Genoa 2006},
  year = {2006},
  note = {EN},
  url = {http://www.exmaralda.org/files/SeitenausLREC.pdf}
}

Gabriel C (2012), "The Hamburg Corpus of Argentinean Spanish (HaCASpa).", In Multilingual Corpora and Multilingual Corpus Analysis. Vol. 14, pp. pp. 183–197. John Benjamins.

[Abstract] [BibTeX]

Abstract: The paper outlines and illustrates the design of the Hamburg Corpus of Argentinean Spanish (HaCASpa, compiled 2008−2009), which comprises oral data from two varieties of Argentinean Spanish (Buenos Aires and Neuquén, Northern Patagonia). Both varieties are characterized by prosodic features that can plausibly be traced back to the contact with Italian during the period of large streams of immigration between 1830 and 1950. After providing the reader with general information on the historical situation of Spanish-Italian bilingualism in Buenos Aires, the contribution focuses on the data types contained in the corpus and the speakers recorded. In addition, the main findings stemming from the analyses performed thus far based on the corpus are summarized.

BibTeX:

@incollection{Gabriel2012,
  author = {Christoph Gabriel},
  editor = {Thomas Schmidt and Kai Wörner},
  title = {The Hamburg Corpus of Argentinean Spanish (HaCASpa).},
  booktitle = {Multilingual Corpora and Multilingual Corpus Analysis},
  publisher = {John Benjamins},
  year = {2012},
  volume = {14},
  pages = {pp. 183–197}
}

Gabriel C (2011), "Hamburg Corpus of Argentinean Spanish (HaCASpa)", Archived in Hamburger Zentrum für Sprachkorpora. Version 0.2. Publication date 2011-06-30. 6, 2011.

BibTeX:

@misc{hacaspa,
  author = {Gabriel, Christoph},
  title = {Hamburg Corpus of Argentinean Spanish (HaCASpa)},
  howpublished = {Archived in Hamburger Zentrum für Sprachkorpora. Version 0.2. Publication date 2011-06-30},
  year = {2011},
  note = {audio; EXMARaLDA},
  url = {http://hdl.handle.net/11022/0000-0000-5F0B-B}
}

Grillparzer NH (), "Fremdsprachlicher Akzent bei simultan bilingualen Sprechern (Deutsch-Französisch)"

BibTeX:

@unpublished{Grillparzer,
  author = {Nele Hannah Grillparzer},
  title = {Fremdsprachlicher Akzent bei simultan bilingualen Sprechern (Deutsch-Französisch)},
  note = {Bachelorarbeit im Studiengang Lehramt an Gymnasien zur Erlangung des wissenschaftlichen Grades eines Bachelor of Arts der Universität Hamburg eingereicht}
}

Haugh M, Ruhi Ş, Schmidt T and Wörner K (2014), "Introduction: Putting practices in spoken corpora into focus", In Best Practices for Speech Corpora in Linguistic Research. , pp. pp. 1-19. Cambridge Scholars Publishing.

BibTeX:

@incollection{Haugh2014,
  author = {Michael Haugh and Şükriye Ruhi and Thomas Schmidt and Kai Wörner},
  editor = {Michael Haugh and Şükriye Ruhi and Thomas Schmidt and Kai Wörner},
  title = {Introduction: Putting practices in spoken corpora into focus},
  booktitle = {Best Practices for Speech Corpora in Linguistic Research},
  publisher = {Cambridge Scholars Publishing},
  year = {2014},
  pages = {pp. 1-19},
  url = {http://www.cambridgescholars.com/download/sample/61765}
}

Hedeland H (2011), "Interaction of technology and methodology in building and sharing an annotated learner corpus of spoken German.", In Actas del 3 Congreso Internacional de Lingüística de Corpus. Tecnologias de la Información y las Comunicaciones: Presente y Futuro en el Análisis de Corpus.

BibTeX:

@inproceedings{Congr_Intern_2011,
  author = {Hedeland, Hanna},
  editor = {Candel Mora, Miguel Ángel and Carrió Pastor, Maria Luisa},
  title = {Interaction of technology and methodology in building and sharing an annotated learner corpus of spoken German.},
  booktitle = {Actas del 3 Congreso Internacional de Lingüística de Corpus. Tecnologias de la Información y las Comunicaciones: Presente y Futuro en el Análisis de Corpus},
  year = {2011}
}

Hedeland H, Lehmberg T, Schmidt T and Wörner K (2014), "Multilingual Corpora at the Hamburg Centre for Language Corpora", In Best Practices for Speech Corpora in Linguistic Research. , pp. pp. 208-224. Cambridge Scholars Publishing.

BibTeX:

@incollection{Haugh2012b,
  author = {Hanna Hedeland and Timm Lehmberg and Thomas Schmidt and Kai Wörner},
  editor = {Michael Haugh and Şükriye Ruhi and Thomas Schmidt and Kai Wörner},
  title = {Multilingual Corpora at the Hamburg Centre for Language Corpora},
  booktitle = {Best Practices for Speech Corpora in Linguistic Research},
  publisher = {Cambridge Scholars Publishing},
  year = {2014},
  pages = {pp. 208-224},
  url = {http://www.cambridgescholars.com/download/sample/61765}
}

Hedeland H, Lehmberg T, Schmidt T and Wörner K (2011), "Multilingual Corpora at the Hamburg Centre for Language Corpora", In Multilingual Resources and Multilingual Applications. Proceedings of GSCL Conference 2011 Hamburg..

[Abstract] [BibTeX] [URL]

Abstract: We give an overview of the content and the technical background of a number of corpora which were developed in various projects of the Research Centre on Multilingualism (SFB 538) between 1999 and 2011 and which are now made available to the scientific community via the Hamburg Centre for Language Corpora.

BibTeX:

@inproceedings{GSCL_HZSK_2011,
  author = {Hanna Hedeland and Timm Lehmberg and Thomas Schmidt and Kai Wörner},
  editor = {Thomas Schmidt and Kai Wörner},
  title = {Multilingual Corpora at the Hamburg Centre for Language Corpora},
  booktitle = {Multilingual Resources and Multilingual Applications. Proceedings of GSCL Conference 2011 Hamburg.},
  year = {2011},
  url = {http://www.exmaralda.org/files/Corpora_HZSK_GSCL2011.pdf}
}

Hedeland H and Schmidt T (2012), "Technological and methodological challenges in creating, annotating and sharing a learner corpus of spoken German", In Multilingual Corpora and Multilingual Corpus Analysis. Vol. 14, pp. 25-46. John Benjamins.

[Abstract] [BibTeX]

Abstract: This article discusses questions concerning the creation, annotation and sharing of spoken language corpora. We use the Hamburg Map Task Corpus (HAMATAC), a small corpus in which advanced learners of German were recorded solving a map task, as an example to illustrate our main points. We first give an overview of the corpus creation and annotation process including recording, metadata documentation, transcription and semi-automatic annotation of the data. We then discuss the manual annotation of disfluencies as an example case in which many of the typical and challenging problems for data reuse – in particular the reliability of interpretative annotations – are revealed.This article discusses questions concerning the creation, annotation and sharing of spoken language corpora. We use the Hamburg Map Task Corpus (HAMATAC), a small corpus in which advanced learners of German were recorded solving a map task, as an example to illustrate our main points. We first give an overview of the corpus creation and annotation process including recording, metadata documentation, transcription and semi-automatic annotation of the data. We then discuss the manual annotation of disfluencies as an example case in which many of the typical and challenging problems for data reuse – in particular the reliability of interpretative annotations – are revealed.

BibTeX:

@incollection{HSM14-HED,
  author = {Hanna Hedeland and Thomas Schmidt},
  editor = {Thomas Schmidt and Kai Wörner},
  title = {Technological and methodological challenges in creating, annotating and sharing a learner corpus of spoken German},
  booktitle = {Multilingual Corpora and Multilingual Corpus Analysis},
  publisher = {John Benjamins},
  year = {2012},
  volume = {14},
  pages = {25-46}
}

Hedeland H and Wörner K (2012), "Experiences and Problems creating a CMDI profile from an existing Metadata Schema", In Proceedings of LREC-Workshop "Describing LRs with Metadata: Towards Flexibility and Interoperability in the Documentation of LR". ELRA.

BibTeX:

@inproceedings{LREC-CMDI,
  author = {Hanna Hedeland and Kai Wörner},
  title = {Experiences and Problems creating a CMDI profile from an existing Metadata Schema},
  booktitle = {Proceedings of LREC-Workshop "Describing LRs with Metadata: Towards Flexibility and Interoperability in the Documentation of LR"},
  publisher = {ELRA},
  year = {2012},
  url = {http://www.lrec-conf.org/proceedings/lrec2012/workshops/11.LREC2012%20Metadata%20Proceedings.pdf}
}

Herkenrath A and Rehbein J (2012), "Pragmatic corpus analysis, exemplified by Turkish-German bilingual and monolingual data.", In Multilingual Corpora and Multilingual Corpus Analysis. Vol. 14, pp. pp. 123–152. John Benjamins.

[Abstract] [BibTeX]

Abstract: The paper presents a methodology for empirical multilingual data analysis that combines quantitative and qualitative research. The data is a bilingual Turkish-German and a monolingual Turkish corpus of spoken child language. The methodology proceeds in several steps: (1) description of transcribed data (PartiturEditor) and of the concepts of ‘constellation’ and ‘Evocative Field Experiment’ (EFE), (2) the methodological role of the linguistic unit ‘utterance’, its marking as ‘segment’ in transcriptions and its importance for corpus formation (CoMa), (3) search procedures and frequency assignment of the findings (EXAKT), (4) classification according to constellative features of the data, (5) contextual interpretation of the items, (6) consultation of the transcript where needed, (7) contextually based categorisation of the items resulting in an empirical determination of their varieties. The objective of the methodological stages is an empirical foundation of discourse-based linguistic analysis of multilingual corpora, which we call ‘Pragmatic Corpus Analysis’ (PCA).

BibTeX:

@incollection{Herkenrath2012,
  author = {Annette Herkenrath and Jochen Rehbein},
  editor = {Thomas Schmidt and Kai Wörner},
  title = {Pragmatic corpus analysis, exemplified by Turkish-German bilingual and monolingual data.},
  booktitle = {Multilingual Corpora and Multilingual Corpus Analysis},
  publisher = {John Benjamins},
  year = {2012},
  volume = {14},
  pages = {pp. 123–152}
}

Herzog G, Heid U, Trippel T, Banski P, Romary L, Schmidt T, Witt A and Eckart K (2015), "Recent Initiatives towards New Standards for Language Resources", In Proceedings of the Int. Conference of the German Society for Computational Linguistics and Language Technology, Duisburg. , pp. 154-156.

BibTeX:

@inproceedings{Herzog2015,
  author = {Herzog, Gottfried and Heid, Ulrich and Trippel, Thorsten and Banski, Piotr and Romary, Laurent and Schmidt, Thomas and Witt, Andreas and Eckart, Kerstin},
  editor = {Fisseni, Bernhard and Schröder, Bernhard and Zesch, Torsten},
  title = {Recent Initiatives towards New Standards for Language Resources},
  booktitle = {Proceedings of the Int. Conference of the German Society for Computational Linguistics and Language Technology, Duisburg},
  year = {2015},
  pages = {154-156},
  url = {http://gscl2015.inf.uni-due.de/wp-content/uploads/2015/09/gscl2015-proceedings.pdf}
}

Höder S (2012), "Annotating ambiguity. Insights from a corpus-based study on syntactic change in Old Swedish", In Multilingual Corpora and Multilingual Corpus Analysis. Vol. 14, pp. pp. 245–271. John Benjamins.

[Abstract] [BibTeX]

Abstract: The synchronic and diachronic variability of historical texts poses substantial difficulties in the annotation and analysis of historical corpora. One main problem is that ongoing language change and particularly grammaticalisation phenomena lead to syntactic ambiguity. This contribution shows how such issues are dealt with in the TEI-based Hamburg Corpus of Old Swedish with Syntactic Annotation (HaCOSSA). The focus is on the development of strictly operational, explicitly defined, largely theory-neutral, language-specific and diachronically broad annotation categories.

BibTeX:

@incollection{Hoeder2012,
  author = {Steffen Höder},
  editor = {Thomas Schmidt and Kai Wörner},
  title = {Annotating ambiguity. Insights from a corpus-based study on syntactic change in Old Swedish},
  booktitle = {Multilingual Corpora and Multilingual Corpus Analysis},
  publisher = {John Benjamins},
  year = {2012},
  volume = {14},
  pages = {pp. 245–271}
}

Höder S, Wörner K and Zeevaert L (2007), "Corpus-based investigations on word order change: The case of Old Nordic", Arbeiten zur Mehrsprachigkeit, Folge B. Vol. 81, pp. 1 ff.

[Abstract] [BibTeX] [URL]

Abstract: This paper presents results from an interdisciplinary cooperation within the Collaborative Research Centre on Multilingualism. First results of this cooperation were published in an earlier paper (BAUMGARTEN et al. 2007) concentrating on an investigation of functional characteristics of coordinating elements in English, German, Old Swedish and Turkish corpora. The aim of the second part of the cooperation was to develop corpus linguistic methods in order to be able to examine word order change in subordinate clauses in older Swedish and Danish texts in comparison to Old West Norse. The starting point for the investigation was the observation that the word order in Swedish main clauses is rather stable from the earliest written sources up to contemporary Swedish, whereas in subordinate clauses, from a diachronic perspective, far-reaching changes can be observed. Starting from the hypothesis that language contact triggered this change, a comparison of an Old Swedish, an Old Danish and an Old West Norse version of the Story of Charlemagne was performed. The West Norse version almost exclusively shows verb second order and no examples of verb late order. In the Danish and the Swedish versions, verb second is also the main option, but more examples of the finite verb in a later position can be found in both texts. In our opinion it seems to be reasonable to suggest that the development of new text types based on Latin models triggered the change that can be observed in the East Norse texts.

BibTeX:

@article{Hoeder2007,
  author = {Steffen Höder and Kai Wörner and Ludger Zeevaert},
  title = {Corpus-based investigations on word order change: The case of Old Nordic},
  journal = {Arbeiten zur Mehrsprachigkeit, Folge B},
  year = {2007},
  volume = {81},
  pages = {1 ff},
  url = {http://www.exmaralda.org/files/azm81.pdf}
}

Höder S, Wörner K and Zeevaert L (2007), "Quantitative analyses of diachronic developments of the function of subordinators: The case of Old Nordic word order.", Arbeiten zur Mehrsprachigkeit, Folge B. Vol. 81

BibTeX:

@article{Hoederetal2007,
  author = {Steffen Höder and Kai Wörner and Ludger Zeevaert},
  title = {Quantitative analyses of diachronic developments of the function of subordinators: The case of Old Nordic word order.},
  journal = {Arbeiten zur Mehrsprachigkeit, Folge B},
  year = {2007},
  volume = {81}
}

House J, Meyer B and Schmidt T (2012), "CoSi – A Corpus of Consecutive and Simultaneous Interpreting", In Multilingual Corpora and Multilingual Corpus Analysis. Vol. 14 John Benjamins.

[Abstract] [BibTeX]

Abstract: This paper describes how to access and use a corpus of comparable consecutive and simultaneous interpreting (Brazilian Portuguese and German). The corpus is available free of charge. Our aim is to stimulate discussions on the use and the accessibility of corpora in interpreting studies, and, more generally, the need for corpus-based studies of interpreting.

BibTeX:

@incollection{House2012,
  author = {Juliane House and Bernd Meyer and Thomas Schmidt},
  editor = {Thomas Schmidt and Kai Wörner},
  title = {CoSi – A Corpus of Consecutive and Simultaneous Interpreting},
  booktitle = {Multilingual Corpora and Multilingual Corpus Analysis},
  publisher = {John Benjamins},
  year = {2012},
  volume = {14}
}

HZSK (2013), "Hamburg Modern Times Corpus (HaMoTiC)", Archived in Hamburger Zentrum für Sprachkorpora. Version 0.2. Publication date 2013-11-01. 11, 2013.

BibTeX:

@misc{hamotic,
  author = {HZSK},
  title = {Hamburg Modern Times Corpus (HaMoTiC)},
  howpublished = {Archived in Hamburger Zentrum für Sprachkorpora. Version 0.2. Publication date 2013-11-01},
  year = {2013},
  note = {audio; EXMARaLDA},
  url = {http://hdl.handle.net/11022/0000-0000-6973-9}
}

HZSK (2010), "HAMATAC - The Hamburg MapTask Corpus", Archived in Hamburger Zentrum für Sprachkorpora. Version 0.3. Publication date 2010-09-16. 9, 2010.

BibTeX:

@misc{hamatac,
  author = {HZSK},
  title = {HAMATAC - The Hamburg MapTask Corpus},
  howpublished = {Archived in Hamburger Zentrum für Sprachkorpora. Version 0.3. Publication date 2010-09-16},
  year = {2010},
  note = {audio; EXMARaLDA},
  url = {http://hdl.handle.net/11022/0000-0000-6330-A}
}

HZSK (2007), "EXMARaLDA Demo Corpus", Archived in Hamburger Zentrum für Sprachkorpora. Version 1310. Publication date 2007-11-08. 11, 2007.

BibTeX:

@misc{demo,
  author = {HZSK},
  title = {EXMARaLDA Demo Corpus},
  howpublished = {Archived in Hamburger Zentrum für Sprachkorpora. Version 1310. Publication date 2007-11-08},
  year = {2007},
  note = {audio & video; EXMARaLDA},
  url = {http://hdl.handle.net/11022/0000-0000-4F70-A}
}

Kellner B, Lehmberg T, Schröder I and Wörner K (2008), "Datenstrukturen für die Analyse regionaler Sprachvariation", In Text Resources and Lexical Knowledge. , pp. 53-64. Mouton de Gruyter.

BibTeX:

@incollection{Kellner2008,
  author = {Kellner, Birgit and Lehmberg, Timm and Schröder, Ingrig and Wörner, Kai},
  editor = {Storrer, Angelika and Geyken, Alexander and Siebert, Alexander and Würzner, Kay-Michael},
  title = {Datenstrukturen für die Analyse regionaler Sprachvariation},
  booktitle = {Text Resources and Lexical Knowledge},
  publisher = {Mouton de Gruyter},
  year = {2008},
  pages = {53-64}
}

Knorr D (2015), "Kommentiertes Lernendenkorpus akademisches Schreiben (KoLaS)", Archived in Hamburger Zentrum für Sprachkorpora. Version 1.0. Publication date 2015.

BibTeX:

@misc{kolas,
  author = {Knorr, Dagmar},
  title = {Kommentiertes Lernendenkorpus akademisches Schreiben (KoLaS)},
  howpublished = {Archived in Hamburger Zentrum für Sprachkorpora. Version 1.0. Publication date 2015},
  year = {2015},
  url = {http://hdl.handle.net/11022/0000-0000-8DE3-1}
}

Kühl K (2012), "Ad hoc contact phenomena or established features of a contact variety? Evidence from corpus analysis", In Multilingual Corpora and Multilingual Corpus Analysis. Vol. 14, pp. pp. 199–214. John Benjamins.

[Abstract] [BibTeX]

Abstract: This contribution deals with the possibilities of distinguishing features of an established contact variety from singly occurring, transient elements using a corpus-based approach. It emphasizes the potential that lies in including different language registers (informal spoken language and formal written language) in the analysis of language contact, and hypothesizes that the register-specific establishment of contact phenomena is possible. This is shown through the example of Danish as it is used on the Faroe Islands, represented by the only two existing digitized and annotated corpora that reflect the bilingualism on the Faroe Islands.

BibTeX:

@incollection{Kuehl2012,
  author = {Karoline Kühl},
  editor = {Thomas Schmidt and Kai Wörner},
  title = {Ad hoc contact phenomena or established features of a contact variety? Evidence from corpus analysis},
  booktitle = {Multilingual Corpora and Multilingual Corpus Analysis},
  publisher = {John Benjamins},
  year = {2012},
  volume = {14},
  pages = {pp. 199–214}
}

Kupietz M and Schmidt T (2015), "Schriftliche und mündliche Korpora am IDS als Grundlage für die empirische Forschung.", In Sprachwissenschaft im Fokus. Positionsbestimmungen und Perspektiven.. , pp. 297-322. de Gruyter.

BibTeX:

@incollection{KupietzSchmidt2015,
  author = {Marc Kupietz and Thomas Schmidt},
  editor = {Ludwig Eichinger},
  title = {Schriftliche und mündliche Korpora am IDS als Grundlage für die empirische Forschung.},
  booktitle = {Sprachwissenschaft im Fokus. Positionsbestimmungen und Perspektiven.},
  publisher = {de Gruyter},
  year = {2015},
  pages = {297-322}
}

Kupisch T (2011), "Hamburg Adult Bilingual LAnguage", Archived in Hamburger Zentrum für Sprachkorpora. Version 0.2. Publication date 2011-06-30. 6, 2011.

BibTeX:

@misc{habla,
  author = {Kupisch, Tanja},
  title = {Hamburg Adult Bilingual LAnguage},
  howpublished = {Archived in Hamburger Zentrum für Sprachkorpora. Version 0.2. Publication date 2011-06-30},
  year = {2011},
  note = {audio; EXMARaLDA},
  url = {http://hdl.handle.net/11022/0000-0000-5C64-9}
}

Kupisch T, Barton D, Bianchi G and Stangen I (2012), "The HABLA-corpus (German-French and German-Italian).", In Multilingual Corpora and Multilingual Corpus Analysis. Vol. 14, pp. pp. 163–179. John Benjamins.

[Abstract] [BibTeX]

Abstract: The HABLA-corpus (Hamburg Adult Bilingual LAnguage) comprises data in the form of semi-structured interviews gathered in the project E11, Linguistic Aspects of Language Attrition and Second Language Acquisition in adult bilinguals (German-French and German-Italian). E11 investigated the language of adult bilinguals (2L1 speakers) who grew up in Germany, Italy or France being exposed to two languages simultaneously from birth, comparing them to advanced second language (L2) learners. In this contribution, we explain the motivation for creating the corpus and introduce the corpus design, including information about the subjects, data acquisition and labelling, quality and transcription conventions, with the purpose of providing an overview of the corpus and facilitate its use.

BibTeX:

@incollection{Kupisch2012,
  author = {Tanja Kupisch and Dagmar Barton and Giulia Bianchi and Ilse Stangen},
  editor = {Thomas Schmidt and Kai Wörner},
  title = {The HABLA-corpus (German-French and German-Italian).},
  booktitle = {Multilingual Corpora and Multilingual Corpus Analysis},
  publisher = {John Benjamins},
  year = {2012},
  volume = {14},
  pages = {pp. 163–179}
}

Lehmberg T, Chiarcos C, Hinrichs E, Rehm G and Witt A (2007), "Collecting Legally Relevant Metadata by Means of a Decision-Tree-Based Questionnaire System", In Proceedings of Digital Humanities 2007, June 2–8, University of Illinois, Urbana-Champaign, USA.

BibTeX:

@inproceedings{Lehmberg2007,
  author = {Lehmberg, Timm and Chiarcos, Christian and Hinrichs, Erhard and Rehm, Georg and Witt, Andreas},
  title = {Collecting Legally Relevant Metadata by Means of a Decision-Tree-Based Questionnaire System},
  booktitle = {Proceedings of Digital Humanities 2007, June 2–8, University of Illinois, Urbana-Champaign, USA},
  year = {2007},
  url = {http://u-002-ssfbv001.uni-tuebingen.de/sfb441/c2/paper2/paper2.html}
}

Lehmberg T, Chiarcos C, Rehm G and Witt A (2007), "Persönlichkeitsrechtliche Fragen bei der Nutzung und Weitergabe linguistischer Daten.", In Proceedings of the GLDV Annual Meeting 2007, April 11-13, Tübingen.

BibTeX:

@inproceedings{Lehmberg2007b,
  author = {Lehmberg, Timm and Chiarcos,Christian and Rehm, Georg and Witt, Andreas},
  title = {Persönlichkeitsrechtliche Fragen bei der Nutzung und Weitergabe linguistischer Daten.},
  booktitle = {Proceedings of the GLDV Annual Meeting 2007, April 11-13, Tübingen},
  year = {2007}
}

Lehmberg T, Rehm G, Witt A and Zimmermann F (2008), "Digital Text Collections, Linguistic Research Data, and Mashups: Notes on the Legal Situation.", Library Trends. Vol. 57 (1), special issue Digital Books and the Impact on Libraries, pp. 52-71.

BibTeX:

@article{Lehmberg2008,
  author = {Lehmberg, Timm and Rehm, Georg and Witt, Andreas and Zimmermann, Felix},
  title = {Digital Text Collections, Linguistic Research Data, and Mashups: Notes on the Legal Situation.},
  journal = {Library Trends},
  year = {2008},
  volume = {57 (1), special issue Digital Books and the Impact on Libraries},
  pages = {52-71}
}

Lehmberg T and Wörner K (2008), "Annotation Standards", In Corpus Linguistics - An international handbook. Vol. 1, pp. 484-501. Walter de Gruyter.

BibTeX:

@incollection{LehmbergWoerner2008,
  author = {Timm Lehmberg and Kai Wörner},
  editor = {Anke Lüdeling and Merja Kytö},
  title = {Annotation Standards},
  booktitle = {Corpus Linguistics - An international handbook},
  publisher = {Walter de Gruyter},
  year = {2008},
  volume = {1},
  pages = {484-501}
}

Lehmberg T and Zimmermann F (2007), "Language Corpora - Copyright - Data Protection: The Legal Point of View", In Proceedings of Digital Humanities 2007, June 2–8, University of Illinois, Urbana-Champaign, USA.

BibTeX:

@inproceedings{Lehmberg2007a,
  author = {Lehmberg, Timm and Zimmermann, Felix},
  title = {Language Corpora - Copyright - Data Protection: The Legal Point of View},
  booktitle = {Proceedings of Digital Humanities 2007, June 2–8, University of Illinois, Urbana-Champaign, USA},
  year = {2007},
  url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.129.2070&rep=rep1&type=pdf}
}

Lein T (2012), "Fremder Akzent in der zweiten Muttersprache: Zum Sprachverlust bei erwachsenen deutsch-französischen Sprechern". Thesis at: Universität Hamburg.

BibTeX:

@mastersthesis{Lein2012,
  author = {Tatjana Lein},
  title = {Fremder Akzent in der zweiten Muttersprache: Zum Sprachverlust bei erwachsenen deutsch-französischen Sprechern},
  school = {Universität Hamburg},
  year = {2012}
}

Lleó C (2012), "Monolingual and bilingual phonoprosodic corpora of child German and child Spanish.", In Multilingual Corpora and Multilingual Corpus Analysis. Vol. 14, pp. pp. 107–122. John Benjamins.

[Abstract] [BibTeX]

Abstract: This article describes two longitudinal language corpora of child German and child Spanish. One of the corpora, PAIDUS, is comprised of the utterances produced by monolingual German and monolingual Spanish children, between the ages of 1 and ca. 3 years. The German children grew up in Hamburg (Germany) and the Spanish children in Madrid (Spain). The other corpus, PhonBLA, is comprised of utterances produced by German-Spanish bilingual children, between the ages of 1 and ca. 7 years, growing up in Hamburg (Germany). The bilingual children have a Spanish-speaking mother and a German-speaking father. All corpora were collected, transcribed and analyzed within various research projects supported by the DFG, between 1986 and 2011. Several analyses of the data have been published in international journals and books (see References).

BibTeX:

@incollection{Lleo2012,
  author = {Conxita Lleó},
  editor = {Thomas Schmidt and Kai Wörner},
  title = {Monolingual and bilingual phonoprosodic corpora of child German and child Spanish.},
  booktitle = {Multilingual Corpora and Multilingual Corpus Analysis},
  publisher = {John Benjamins},
  year = {2012},
  volume = {14},
  pages = {pp. 107–122}
}

Lleó C (2011), "ALCEBLA", Archived in Hamburger Zentrum für Sprachkorpora. Version 0.1. Publication date 2011-06-30. 6, 2011.

BibTeX:

@misc{alcebla,
  author = {Lleó, Conxita},
  title = {ALCEBLA},
  howpublished = {Archived in Hamburger Zentrum für Sprachkorpora. Version 0.1. Publication date 2011-06-30},
  year = {2011},
  note = {audio; EXMARaLDA},
  url = {http://hdl.handle.net/11022/0000-0000-50DD-D}
}

Lleó C (2011), "Catalan in a bilingual context (PhonCAT)", Archived in Hamburger Zentrum für Sprachkorpora. Version 0.1. Publication date 2011-06-30. 6, 2011.

BibTeX:

@misc{phoncat,
  author = {Lleó, Conxita},
  title = {Catalan in a bilingual context (PhonCAT)},
  howpublished = {Archived in Hamburger Zentrum für Sprachkorpora. Version 0.1. Publication date 2011-06-30},
  year = {2011},
  url = {http://hdl.handle.net/11022/0000-0000-772F-7}
}

Lleó C (2011), "PhonBLA Longitudinalstudie Hamburg", Archived in Hamburger Zentrum für Sprachkorpora. Version 0.1. Publication date 2011-06-30. 6, 2011.

BibTeX:

@misc{phonblalong,
  author = {Lleó, Conxita},
  title = {PhonBLA Longitudinalstudie Hamburg},
  howpublished = {Archived in Hamburger Zentrum für Sprachkorpora. Version 0.1. Publication date 2011-06-30},
  year = {2011},
  note = {audio; EXMARaLDA},
  url = {http://hdl.handle.net/11022/0000-0000-70CA-E}
}

Lleó C (2011), "Phon-CL2", Archived in Hamburger Zentrum für Sprachkorpora. Version 0.1. Publication date 2011-06-30. 6, 2011.

BibTeX:

@misc{phoncl2,
  author = {Lleó, Conxita},
  title = {Phon-CL2},
  howpublished = {Archived in Hamburger Zentrum für Sprachkorpora. Version 0.1. Publication date 2011-06-30},
  year = {2011},
  note = {audio; EXMARaLDA},
  url = {http://hdl.handle.net/11022/0000-0000-7D27-9}
}

Lleó C (2011), "Phonologie-Erwerb Deutsch-Spanisch als Erste Sprachen (PEDSES)", Archived in Hamburger Zentrum für Sprachkorpora. Version 0.1. Publication date 2011-06-30. 6, 2011.

BibTeX:

@misc{pedses,
  author = {Lleó, Conxita},
  title = {Phonologie-Erwerb Deutsch-Spanisch als Erste Sprachen (PEDSES)},
  howpublished = {Archived in Hamburger Zentrum für Sprachkorpora. Version 0.1. Publication date 2011-06-30},
  year = {2011},
  note = {audio; EXMARaLDA},
  url = {http://hdl.handle.net/11022/0000-0000-6ECE-E}
}

Lleó C (2010), "Parameterfixierung im Deutschen und Spanischen (PAIDUS)", Archived in Hamburger Zentrum für Sprachkorpora. Version 0.4. Publication date 2010-09-30. 9, 2010.

BibTeX:

@misc{paidus,
  author = {Lleó, Conxita},
  title = {Parameterfixierung im Deutschen und Spanischen (PAIDUS)},
  howpublished = {Archived in Hamburger Zentrum für Sprachkorpora. Version 0.4. Publication date 2010-09-30},
  year = {2010},
  note = {audio; EXMARaLDA},
  url = {http://hdl.handle.net/11022/0000-0000-69DD-2}
}

Lleó C, Kuchenbrandt I, Kehoe M and Trujillo C (2003), "Syllable final consonants in Spanish and German monolingual and bilingual acquisition", In (In)vulnerable Domains in Multilingualism. , pp. 191-220. John Benjamins Publishing Company.

BibTeX:

@incollection{Lleoetal2003a,
  author = {Conxita Lleó and Imme Kuchenbrandt and Margaret Kehoe and Christina Trujillo},
  editor = {Natascha Müller},
  title = {Syllable final consonants in Spanish and German monolingual and bilingual acquisition},
  booktitle = {(In)vulnerable Domains in Multilingualism},
  publisher = {John Benjamins Publishing Company},
  year = {2003},
  pages = {191-220}
}

MacWhinney B, Schmidt T, Martell C, Wagner J, Wittenburg P and Hoffer E (2004), "Collaborative Commentary: Opening Up Spoken Language Databases", In Proceedings of the Language Resource and Evalutation Conference 2004, Lisbon. Paris ELRA.

BibTeX:

@inproceedings{Schmidt2004c,
  author = {Brian MacWhinney and Thomas Schmidt and Craig Martell and Johannes Wagner and Peter Wittenburg and Eric Hoffer},
  title = {Collaborative Commentary: Opening Up Spoken Language Databases},
  booktitle = {Proceedings of the Language Resource and Evalutation Conference 2004, Lisbon},
  publisher = {ELRA},
  year = {2004},
  note = {EN},
  url = {http://repository.cmu.edu/cgi/viewcontent.cgi?article=1176&context=psychology}
}

Marquardt K (2012), "Frage-Antwort-Sequenzen im gedolmetschten ärztlichen Anamnesegespräch". Thesis at: Universität Mannheim.

BibTeX:

@mastersthesis{Marquardt2012,
  author = {Katja Marquardt},
  title = {Frage-Antwort-Sequenzen im gedolmetschten ärztlichen Anamnesegespräch},
  school = {Universität Mannheim},
  year = {2012}
}

Meißner C and Slavcheva A (2013), "Review of EXMARaLDA", Language Documentation and Conservation. Vol. 7, pp. 31-40.

[Abstract] [BibTeX] [DOI] [URL]

Abstract: EXMARaLDA is a system for creating, managing and analyzing spoken language corpora (Schmidt & Wörner 2009, Schmidt et al. 2011), developed between 2000 and 2011at the Research Centre on Multilingualism (SFB 538) at the University of Hamburg. It is now maintained at the Hamburg Center for Speech Corpora (HZSK)1, and since November 2011, also in cooperation with the Archive for Spoken German (AGD) at the Institute for the German Language (IDS) in Mannheim. It comprises tools for transcribing spoken language (Partitur-Editor), managing metadata (Corpus Manager), and querying spoken language corpora (EXAKT). The software components are freely available and operate on all platforms (Windows, Linux, Macintosh). EXMARaLDA forms the basis for 23 multilingual corpora of spoken language at the Hamburg Center for Speech Corpora. Its primary scope of application covers discourse and conversation analysis, first and second language acquisition studies, and dialectology (cf. Schmidt 2009: 158). This paper reviews the software from the perspective of its application in the GeWiss project, one of several larger corpus projects that have used EXMARaLDA.2 As a starting point, the review will introduce the software requirements of the project, and their role in choosing the EXMARaLDA package for the creation of the GeWiss Corpus. As we worked with all three components of the software, the review will then deal in turn with the Partitur-Editor (version 1.5.1), the Corpus Manager (version 1.9), and EXAKT (version 1.1). In conclusion, some remarks concerning support and compatibility of the software will be made.

BibTeX:

@article{MeissnerSlavcheva2013,
  author = {Cordula Meißner and Adriana Slavcheva},
  title = {Review of EXMARaLDA},
  journal = {Language Documentation and Conservation},
  year = {2013},
  volume = {7},
  pages = {31-40},
  url = {https://scholarspace.manoa.hawaii.edu/bitstream/handle/10125/4571/Meissner.pdf?sequence=1},
  doi = {10125/4571}
}

Menzel W (), "Hamburg Dependency Treebank", Archived in Hamburger Zentrum für Sprachkorpora..

BibTeX:

@misc{treebank,
  author = {Menzel, Wolfgang},
  title = {Hamburg Dependency Treebank},
  howpublished = {Archived in Hamburger Zentrum für Sprachkorpora.},
  url = {http://hdl.handle.net/11022/0000-0000-7FC7-2}
}

Merkel S and Schmidt T (2009), "Koprora gesprochener Sprache im Netz - eine Umschau", Gesprächsforschung. Vol. 10, pp. 70-93.

BibTeX:

@article{MerkelSchmidt2009,
  author = {Merkel, Silke and Schmidt, Thomas},
  title = {Koprora gesprochener Sprache im Netz - eine Umschau},
  journal = {Gesprächsforschung},
  year = {2009},
  volume = {10},
  pages = {70-93},
  url = {http://www.gespraechsforschung-ozs.de/heft2009/px-merkel.pdf}
}

Meyer B (2010), "Consecutive and Simultaneous Interpreting (CoSi)", Archived in Hamburger Zentrum für Sprachkorpora. Version 1.1. Publication date 2010-02-26. 2, 2010.

BibTeX:

@misc{cosi,
  author = {Meyer, Bernd},
  title = {Consecutive and Simultaneous Interpreting (CoSi)},
  howpublished = {Archived in Hamburger Zentrum für Sprachkorpora. Version 1.1. Publication date 2010-02-26},
  year = {2010},
  note = {audio & video; EXMARaLDA},
  url = {http://hdl.handle.net/11022/0000-0000-5225-A}
}

Pavlidou T-S, Kapellidi C and Karafoti E (2012), "The Corpus of Spoken Greek (CSG)", In Best Practices for Speech Corpora in Linguistic Research. , pp. pp. 56-74. Cambridge Scholars Publishing.

BibTeX:

@incollection{Haugh2012c,
  author = {Theodossia-Soula Pavlidou and Charikleia Kapellidi and Eleni Karafoti},
  editor = {Michael Haugh and Şükriye Ruhi and Thomas Schmidt and Kai Wörner},
  title = {The Corpus of Spoken Greek (CSG)},
  booktitle = {Best Practices for Speech Corpora in Linguistic Research},
  publisher = {Cambridge Scholars Publishing},
  year = {2012},
  pages = {pp. 56-74},
  url = {http://www.cambridgescholars.com/download/sample/61765}
}

Redder A (2002), "Professionelles Transkribieren", In Transkription – Medien/Lektüre. München , pp. 115-131. Fink.

BibTeX:

@incollection{Redder2002,
  author = {Redder, Angelika},
  editor = {Jäger, Ludwig and Stanitzek, Georg},
  title = {Professionelles Transkribieren},
  booktitle = {Transkription – Medien/Lektüre},
  publisher = {Fink},
  year = {2002},
  pages = {115-131},
  note = {DE}
}

Rehbein J (2009), "Rehbein-SKOBI (Sprachliche Konnektivität bei bilingual türkisch-deutsch aufwachsenden Kindern und Jugendlichen)", Archived in Hamburger Zentrum für Sprachkorpora. Version 1.0. Publication date 2009-03-04. 3, 2009.

BibTeX:

@misc{endfasskobi,
  author = {Rehbein, Jochen},
  title = {Rehbein-SKOBI (Sprachliche Konnektivität bei bilingual türkisch-deutsch aufwachsenden Kindern und Jugendlichen)},
  howpublished = {Archived in Hamburger Zentrum für Sprachkorpora. Version 1.0. Publication date 2009-03-04},
  year = {2009},
  note = {audio; EXMARaLDA},
  url = {http://hdl.handle.net/11022/0000-0000-535D-B}
}

Rehbein J, Schmidt T, Meyer B, Watzke F and Herkenrath A (2004), "Handbuch für das computergestützte Transkribieren nach HIAT", Arbeiten zur Mehrsprachigkeit, Folge B. Vol. 56, pp. 1 ff.

BibTeX:

@article{Rehbein2004,
  author = {Rehbein, Jochen and Schmidt, Thomas and Meyer, Bernd and Watzke, Franziska and Herkenrath, Annette},
  title = {Handbuch für das computergestützte Transkribieren nach HIAT},
  journal = {Arbeiten zur Mehrsprachigkeit, Folge B},
  year = {2004},
  volume = {56},
  pages = {1 ff},
  note = {DE},
  url = {http://www.exmaralda.org/files/azm_56.pdf}
}

Rehm G, Schonefeld O, Witt A, Chiarcos C and Lehmberg T (2008), "A Web-Platform for Preserving, Exploring, Visualising and Querying Linguistic Corpora and other Resources", In SEPLN 2008 – 24th Edition of the Conference of the Spanish Society for Natural Language Processing, September 10–12, Madrid, Spain.

BibTeX:

@inproceedings{Rehm2008a,
  author = {Rehm, Georg and Schonefeld, Oliver and Witt, Andreas and Chiarcos, Christian and Lehmberg, Timm},
  title = {A Web-Platform for Preserving, Exploring, Visualising and Querying Linguistic Corpora and other Resources},
  booktitle = {SEPLN 2008 – 24th Edition of the Conference of the Spanish Society for Natural Language Processing, September 10–12, Madrid, Spain},
  year = {2008}
}

Rehm G, Schonefeld O, Witt A, Chiarcos C and Lehmberg T (2008), "SPLICR: A Sustainability Platform for Linguistic Corpora and Resources", In Konferenz zur Verarbeitung natürlicher Sprache (KONVENS 2008), September 30–October 02, Berlin, Germany,.

BibTeX:

@inproceedings{Rehm2008,
  author = {Rehm, Georg and Schonefeld, Oliver and Witt, Andreas and Chiarcos, Christian and Lehmberg, Timm},
  title = {SPLICR: A Sustainability Platform for Linguistic Corpora and Resources},
  booktitle = {Konferenz zur Verarbeitung natürlicher Sprache (KONVENS 2008), September 30–October 02, Berlin, Germany,},
  year = {2008}
}

Rehm G, Schonefeld O, Witt A, Lehmberg T, Chiarcos C, Bechara H, Eishold F, Evang K, Leshtanska M, Savkov A and Stark M (2008), "The Metadata-Database of a Next Generation Sustainability Web-Platform for Language Re-sources.", In Proceedings of LREC 2008., May, 2008.

BibTeX:

@inproceedings{Rehm2008b,
  author = {Georg Rehm and Oliver Schonefeld and Andreas Witt and Timm Lehmberg and Christian Chiarcos and Hanan Bechara and Florian Eishold and Kilian Evang and Magdalena Leshtanska and Aleksandar Savkov and Matthias Stark},
  title = {The Metadata-Database of a Next Generation Sustainability Web-Platform for Language Re-sources.},
  booktitle = {Proceedings of LREC 2008},
  year = {2008},
  url = {http://georg-re.hm/pdf/LREC-2008-Metadata.pdf}
}

Reimer E, Trevisan B, Erame D, Schmidt T and Jakobs E-M (2015), "Annotating Modality Interdependencies", In International Conference of the German Society for Computational Linguistics and Language Technology. Proceedings of the Conference, Sep 30 – Oct 2, 2015, University of Duisburg-Essen, Germany. , pp. 101 - 111.

BibTeX:

@inproceedings{ReimerTrevisanErassmeetal.2015,
  author = {Eva Reimer and Bianka Trevisan and Denise Erame and Thomas Schmidt and Eva-Maria Jakobs},
  editor = {Bernhard Fisseni and Bernhard Schröder and Torsten Zesch},
  title = {Annotating Modality Interdependencies},
  booktitle = {International Conference of the German Society for Computational Linguistics and Language Technology. Proceedings of the Conference, Sep 30 – Oct 2, 2015, University of Duisburg-Essen, Germany},
  year = {2015},
  pages = {101 -- 111}
}

Reimer E, Trevisan B, Eraßme D, Schmidt T and Jakobs E-M (2015), "Annotating Modality Interdependencies", In Proceedings of the Int. Conference of the German Society for Computational Linguistics and Language Technology, Duisburg. , pp. 110-111.

BibTeX:

@inproceedings{Reimeretal2015a,
  author = {Reimer, Eva and Trevisan,Bianca and Eraßme,Denise and Schmidt, Thomas and Jakobs, Eva-Maria},
  editor = {Fisseni, Bernhard and Schröder, Bernhard and Zesch, Torsten},
  title = {Annotating Modality Interdependencies},
  booktitle = {Proceedings of the Int. Conference of the German Society for Computational Linguistics and Language Technology, Duisburg},
  year = {2015},
  pages = {110-111},
  url = {http://gscl2015.inf.uni-due.de/wp-content/uploads/2015/09/gscl2015-proceedings.pdf}
}

Rohlfing K, Loehr D, Duncan S, Brown A, Franklin A, Kimbara I, Milde J-T, Parrill F, Rose T, Schmidt T, Sloetjes H, Thies A and Wellinghoff S (2006), "Comparison of multimodal annotation tools — workshop report", Gesprächsforschung. Vol. 7, pp. 99-123.

BibTeX:

@article{Rohlfing2006,
  author = {Rohlfing, Katharina and Loehr, Daniel and Duncan, Susan and Brown, Amanda and Franklin, Amy and Kimbara, Irene and Milde, Jan-Torsten and Parrill, Fey and Rose, Travis and Schmidt, Thomas and Sloetjes, Han and Thies, Alexandra and Wellinghoff, Sandra},
  title = {Comparison of multimodal annotation tools — workshop report},
  journal = {Gesprächsforschung},
  year = {2006},
  volume = {7},
  pages = {99-123},
  note = {EN},
  url = {http://www.gespraechsforschung-ozs.de/heft2006/tb-rohlfing.pdf}
}

Rothweiler M (2011), "Turkish-German Successive-Bilinguals Corpus (TÜ_DE_cL2 Hamburg)", Archived in Hamburger Zentrum für Sprachkorpora. Version 0.1. Publication date 2011-06-30. 6, 2011.

BibTeX:

@misc{tuedecl2,
  author = {Rothweiler, Monika},
  title = {Turkish-German Successive-Bilinguals Corpus (TÜ_DE_cL2 Hamburg)},
  howpublished = {Archived in Hamburger Zentrum für Sprachkorpora. Version 0.1. Publication date 2011-06-30},
  year = {2011},
  note = {video; EXMARaLDA},
  url = {http://hdl.handle.net/11022/0000-0000-7D90-1}
}

Ruhi Ş, Hatipoğlu Ç, Işık-Güler H and Eröz-Tuğa B (2010), "A Guideline for Transcribing Conversations for the Construction of Spoken Turkish Corpora Using EXMARaLDA and HIAT", August, 2010.

BibTeX:

@booklet{Ruhi_et_al_2010,
  author = {Ruhi, Şükriye and Hatipoğlu, Çiler and Işık-Güler, Hale and Eröz-Tuğa, Betil},
  title = {A Guideline for Transcribing Conversations for the Construction of Spoken Turkish Corpora Using EXMARaLDA and HIAT},
  year = {2010},
  url = {http://std.metu.edu.tr/en/}
}

Ruhi Ş and Taş EEI (2014), "Constructing General and Dialectal Spoken Corpora for Language Variation Research: Two Case Studies from Turkish", In Best Practices for Speech Corpora in Linguistic Research. , pp. pp. 36-55. Cambridge Scholars Publishing.

BibTeX:

@incollection{Haugh2012d,
  author = {Şükriye Ruhi and E. Eda Işık Taş},
  editor = {Michael Haugh and Şükriye Ruhi and Thomas Schmidt and Kai Wörner},
  title = {Constructing General and Dialectal Spoken Corpora for Language Variation Research: Two Case Studies from Turkish},
  booktitle = {Best Practices for Speech Corpora in Linguistic Research},
  publisher = {Cambridge Scholars Publishing},
  year = {2014},
  pages = {pp. 36-55},
  url = {http://www.cambridgescholars.com/download/sample/61765}
}

Schaeffer-Lacroix E (2009), "Corpus numériques et production écrite en langue étrangère une recherche avec des apprenants d'allemand". Thesis at: Université Sorbonne Nouvelle.

BibTeX:

@phdthesis{Schaeffer-Lacroix2009,
  author = {Eva Schaeffer-Lacroix},
  title = {Corpus numériques et production écrite en langue étrangère une recherche avec des apprenants d'allemand},
  school = {Université Sorbonne Nouvelle},
  year = {2009}
}

Schäfer A (), "Sprachmischungen bei bilingual aufwachsenden Kindern: Eine Analyse der Stellung der attributiven Adjektive in der Nominalphrase bei französisch-deutsch bilingualem Erstspracherwerb"

BibTeX:

@unpublished{Balbach,
  author = {Anja Schäfer},
  title = {Sprachmischungen bei bilingual aufwachsenden Kindern: Eine Analyse der Stellung der attributiven Adjektive in der Nominalphrase bei französisch-deutsch bilingualem Erstspracherwerb},
  note = {Seminararbeit; Humboldt-Universität zu Berlin; Institut für deutsche Literatur; Fachbereich Linguistik}
}

Schindler K and Sasaki F (2001), "Bielefelder Workshop "Transkriptionen: Standards, Tools und gesprächsanalytische Anforderungen"", Gesprächsforschung. Vol. 2, pp. 58-66.

BibTeX:

@article{Schindler2001,
  author = {Schindler, Kirsten and Sasaki, Felix},
  title = {Bielefelder Workshop "Transkriptionen: Standards, Tools und gesprächsanalytische Anforderungen"},
  journal = {Gesprächsforschung},
  year = {2001},
  volume = {2},
  pages = {58-66},
  note = {DE},
  url = {http://www.gespraechsforschung-ozs.de/heft2001/tb-sasaki.pdf}
}

Schmidt T (2015), "Good practices in the compilation of FOLK (Research and Teaching Corpus of Spoken German).", In Compilation and Annotation of Spoken Corpora: Towards Best Practice. (Special issue of the International Journal of Corpus Linguistics). John Benjamins Publishing Company.

BibTeX:

@incollection{Schmidt2015,
  author = {Thomas Schmidt},
  editor = {Kirk, J., & Andersen, G.},
  title = {Good practices in the compilation of FOLK (Research and Teaching Corpus of Spoken German).},
  booktitle = {Compilation and Annotation of Spoken Corpora: Towards Best Practice. (Special issue of the International Journal of Corpus Linguistics)},
  publisher = {John Benjamins Publishing Company},
  year = {2015}
}

Schmidt T (2014), "(More) Common Ground for Processing Spoken Language Corpora?", In Best Practices for Speech Corpora in Linguistic Research. , pp. pp. 249-265. Cambridge Scholars Publishing.

BibTeX:

@incollection{Haugh2012a,
  author = {Thomas Schmidt},
  editor = {Michael Haugh and Şükriye Ruhi and Thomas Schmidt and Kai Wörner},
  title = {(More) Common Ground for Processing Spoken Language Corpora?},
  booktitle = {Best Practices for Speech Corpora in Linguistic Research},
  publisher = {Cambridge Scholars Publishing},
  year = {2014},
  pages = {pp. 249-265},
  url = {http://www.cambridgescholars.com/download/sample/61765}
}

Schmidt T (2014), "Gesprächskorpora und Gesprächsdatenbanken am Beispiel von FOLK und DGD.", Gesprächsforschung. Vol. 15, pp. 196-233.

[Abstract] [BibTeX] [URL]

Abstract: Dieser Beitrag stellt das Forschungs- und Lehrkorpus Gesprochenes Deutsch (FOLK) und die Datenbank für Gesprochenes Deutsch (DGD) als Instrumente gesprächsanalytischer Arbeit vor. Nach einer allgemeinen Einführung in FOLK und DGD im zweiten Abschnitt werden im dritten Abschnitt die methodischen Beziehungen zwischen Korpuslinguistik und Gesprächsforschung und die Herausforderungen, die sich bei der Begegnung dieser beiden Herangehensweisen an authentisches Sprachmaterial stellen, kurz skizziert. Der vierte Abschnitt illustriert dann ausgehend vom Beispiel der Formel ich sag mal, wie eine korpus- und datenbankgesteuerte Analyse zur Untersuchung von Gesprächsphänomenen beitragen kann.

BibTeX:

@article{GO_FOLKER_DGD_2014,
  author = {Thomas Schmidt},
  title = {Gesprächskorpora und Gesprächsdatenbanken am Beispiel von FOLK und DGD.},
  journal = {Gesprächsforschung},
  year = {2014},
  volume = {15},
  pages = {196-233},
  url = {http://www.gespraechsforschung-ozs.de/fileadmin/dateien/heft2014/px-schmidt.pdf}
}

Schmidt T (2014), "The Database for Spoken German – DGD2", In Proceedings of LREC. , pp. 1451-1457.

[Abstract] [BibTeX] [URL]

Abstract: The Database for Spoken German (Datenbank für Gesprochenes Deutsch, DGD2, http://dgd.ids-mannheim.de) is the central platform for publishing and disseminating spoken language corpora from the Archive of Spoken German (Archiv für Gesprochenes Deutsch, AGD, http://agd.ids-mannheim.de) at the Institute for the German Language in Mannheim. The corpora contained in the DGD2 come from a variety of sources, some of them in-house projects, some of them external projects. Most of the corpora were originally intended either for research into the (dialectal) variation of German or for studies in conversation analysis and related fields. The AGD has taken over the task of permanently archiving these resources and making them available for reuse to the research community. To date, the DGD2 offers access to 19 different corpora, totalling around 9000 speech events, 2500 hours of audio recordings or 8 million transcribed words. This paper gives an overview of the data made available via the DGD2, of the technical basis for its implementation, and of the most important functionalities it offers. The paper concludes with information about the users of the database and future plans for its development.

BibTeX:

@inproceedings{LREC_2014_DGD,
  author = {Thomas Schmidt},
  title = {The Database for Spoken German – DGD2},
  booktitle = {Proceedings of LREC},
  year = {2014},
  pages = {1451-1457},
  url = {http://www.lrec-conf.org/proceedings/lrec2014/pdf/171_Paper.pdf}
}

Schmidt T (2014), "The Research and Teaching Corpus of Spoken German – FOLK", In Proceedings of LREC. , pp. 383-387.

[Abstract] [BibTeX] [URL]

Abstract: FOLK is the ""Forschungs- und Lehrkorpus Gesprochenes Deutsch (FOLK)"" (eng.: research and teaching corpus of spoken German). The project has set itself the aim of building a corpus of German conversations which a) covers a broad range of interaction types in private, institutional and public settings, b) is sufficiently large and diverse and of sufficient quality to support different qualitative and quantitative research approaches, c) is transcribed, annotated and made accessible according to current technological standards, and d) is available to the scientific community on a sound legal basis and without unnecessary restrictions of usage. This paper gives an overview of the corpus design, the strategies for acquisition of a diverse range of interaction data, and the corpus construction workflow from recording via transcription an annotation to dissemination.

BibTeX:

@inproceedings{LREC_2014_FOLK,
  author = {Thomas Schmidt},
  title = {The Research and Teaching Corpus of Spoken German – FOLK},
  booktitle = {Proceedings of LREC},
  year = {2014},
  pages = {383-387},
  url = {http://www.lrec-conf.org/proceedings/lrec2014/pdf/290_Paper.pdf}
}

Schmidt T (2012), "EXMARaLDA and the FOLK tools", In Proceedings of LREC. ELRA.

BibTeX:

@inproceedings{LREC-FOLKER,
  author = {Thomas Schmidt},
  title = {EXMARaLDA and the FOLK tools},
  booktitle = {Proceedings of LREC},
  publisher = {ELRA},
  year = {2012},
  url = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/529_Paper.pdf}
}

Schmidt T (2011), "A TEI-based Approach to Standardising Spoken Language Transcription", Journal of the Text Encoding Initiative. Vol. 1, pp. 1-22.

BibTeX:

@article{Schmidt2011,
  author = {Thomas Schmidt},
  title = {A TEI-based Approach to Standardising Spoken Language Transcription},
  journal = {Journal of the Text Encoding Initiative},
  year = {2011},
  volume = {1},
  pages = {1-22},
  url = {http://jtei.revues.org/142}
}

Schmidt T (2010), "Another extension of the stylesheet metaphor – Visualising multi-layer annotations as musical scores", In Linguistic modelling of information and Markup Languages. Dordrecht , pp. 23-44. Springer.

BibTeX:

@incollection{Schmidt2008a,
  author = {Thomas Schmidt},
  editor = {Andreas Witt and Dieter Metzing},
  title = {Another extension of the stylesheet metaphor – Visualising multi-layer annotations as musical scores},
  booktitle = {Linguistic modelling of information and Markup Languages},
  publisher = {Springer},
  year = {2010},
  pages = {23-44},
  note = {EN}
}

Schmidt T (2010), "Das Kicktionary : Beziehungen im Wortschatz am Beispiel der Fußballsprache", Der Deutschunterricht. Vol. 3, pp. 17-25.

BibTeX:

@article{KicktionaryDeutschunterricht,
  author = {Thomas Schmidt},
  title = {Das Kicktionary : Beziehungen im Wortschatz am Beispiel der Fußballsprache},
  journal = {Der Deutschunterricht},
  year = {2010},
  volume = {3},
  pages = {17-25},
  url = {http://ids-pub.bsz-bw.de/frontdoor/index/index/docId/2302}
}

Schmidt T (2010), "EXMARaLDA : un système pour la constitution et l'exploitation de corpus oraux.", In Pour une épistémologie de la sociolinguistique. Actes du colloque international de Montpellier 10-12 décembre 2009. , pp. 319-327. Lambert-Lucas.

BibTeX:

@incollection{ActesMontpellier,
  author = {Thomas Schmidt},
  editor = {Henri Boyer},
  title = {EXMARaLDA : un système pour la constitution et l'exploitation de corpus oraux.},
  booktitle = {Pour une épistémologie de la sociolinguistique. Actes du colloque international de Montpellier 10-12 décembre 2009},
  publisher = {Lambert-Lucas},
  year = {2010},
  pages = {319-327},
  url = {http://ids-pub.bsz-bw.de/frontdoor/index/index/docId/2237}
}

Schmidt T (2010), "Linguistic Tool Development between Community Practices and Technology Standards", In Proceedings of the LREC Workshop Language Resource and Language Technology Standards – state of the art, emerging needs, and future developments. Valletta, Malta, may, 2010. European Language Resources Association (ELRA).

BibTeX:

@inproceedings{SCHMIDT_2010_LREC_Standards,
  author = {Thomas Schmidt},
  editor = {Gerhard Budin and Thierry Declerck and Laurent Romary and Peter Wittenburg},
  title = {Linguistic Tool Development between Community Practices and Technology Standards},
  booktitle = {Proceedings of the LREC Workshop Language Resource and Language Technology Standards – state of the art, emerging needs, and future developments},
  publisher = {European Language Resources Association (ELRA)},
  year = {2010},
  url = {http://www.exmaralda.org/files/LREC_Standards.pdf}
}

Schmidt T (2009), "Creating and Working with Spoken Language Corpora in EXMARaLDA", In LULCL II: Lesser Used Languages & Computer Linguistics II. , pp. 151-164.

BibTeX:

@inproceedings{Schmidt2009Bozen,
  author = {Thomas Schmidt},
  editor = {Verena Lyding},
  title = {Creating and Working with Spoken Language Corpora in EXMARaLDA},
  booktitle = {LULCL II: Lesser Used Languages & Computer Linguistics II},
  year = {2009},
  pages = {151-164},
  url = {http://www.eurac.edu/Org/LanguageLaw/Multilingualism/Projects/LULCL_II_proceedings.htm}
}

Schmidt T (2009), "Kicktionary", In Flickflack, Foul und Tsukahara: Der Sport und seine Sprache.. Vol. 10(Thema Deutsch) Duden-Verlag.

BibTeX:

@incollection{Flick_2009,
  author = {Thomas Schmidt},
  editor = {Burkhard, Armin and Schlobinski, Peter},
  title = {Kicktionary},
  booktitle = {Flickflack, Foul und Tsukahara: Der Sport und seine Sprache.},
  publisher = {Duden-Verlag},
  year = {2009},
  volume = {10},
  number = {Thema Deutsch},
  url = {http://ids-pub.bsz-bw.de/frontdoor/index/index/docId/2241}
}

Schmidt T (2009), "The Kicktionary - A multilingual lexical resource of football language", In Multilingual Framenets in Computation Lexicography. de Gruyter.

BibTeX:

@incollection{Schmidt2008d,
  author = {Thomas Schmidt},
  editor = {Hans C. Boas},
  title = {The Kicktionary - A multilingual lexical resource of football language},
  booktitle = {Multilingual Framenets in Computation Lexicography},
  publisher = {de Gruyter},
  year = {2009},
  url = {http://ids-pub.bsz-bw.de/frontdoor/index/index/docId/2348}
}

Schmidt T (2008), "GAT: Aspekte der computertechnischen Umsetzbarkeit". Thesis at: Universität Hamburg / IDS Mannheim.

BibTeX:

@techreport{GAT2008,
  author = {Thomas Schmidt},
  title = {GAT: Aspekte der computertechnischen Umsetzbarkeit},
  school = {Universität Hamburg / IDS Mannheim},
  year = {2008},
  url = {http://www.exmaralda.org/files/GAT_Analyse2.pdf}
}

Schmidt T (2008), "The Kicktionary revisited", In Text Resources and Lexical Knowledge. , pp. 239-251. Mouton de Gruyter.

BibTeX:

@incollection{Kick:Rev2008,
  author = {Thomas Schmidt},
  editor = {Storrer, Angelika and Geyken, Alexander and Siebert, Alexander and Würzner, Kay-Michael},
  title = {The Kicktionary revisited},
  booktitle = {Text Resources and Lexical Knowledge},
  publisher = {Mouton de Gruyter},
  year = {2008},
  pages = {239-251}
}

Schmidt T (2008), "The Kicktionary: Combining corpus linguistics and lexical semantics for a multilingual football dictionary", In The Linguistics of Football. Vol. 38, pp. 11-23. Gunter Narr.

BibTeX:

@incollection{Schmidt2008c,
  author = {Thomas Schmidt},
  editor = {Eva Lavric and Gerhard Pisek and Andrew Skinner and Wolfgang Stadler},
  title = {The Kicktionary: Combining corpus linguistics and lexical semantics for a multilingual football dictionary},
  booktitle = {The Linguistics of Football},
  publisher = {Gunter Narr},
  year = {2008},
  volume = {38},
  pages = {11-23}
}

Schmidt T (2007), "The Kicktionary: A multilingual resource of the language of football", In Data Structures for Linguistic Resources And Aplplications. , pp. 189-196. Gunter Narr.

BibTeX:

@incollection{Schmidt2007a,
  author = {Thomas Schmidt},
  editor = {Rehm, Georg and Witt, Andreas and Lemnitzer, Lothar},
  title = {The Kicktionary: A multilingual resource of the language of football},
  booktitle = {Data Structures for Linguistic Resources And Aplplications},
  publisher = {Gunter Narr},
  year = {2007},
  pages = {189-196},
  url = {http://www.kicktionary.de/RESOURCES/schmidt2007.pdf}
}

Schmidt T (2007), "Transkriptionskonventionen für die computergestützte gesprächsanalytische Transkription", Gesprächsforschung. Vol. 8, pp. 229-241.

BibTeX:

@article{Schmidt2007,
  author = {Thomas Schmidt},
  title = {Transkriptionskonventionen für die computergestützte gesprächsanalytische Transkription},
  journal = {Gesprächsforschung},
  year = {2007},
  volume = {8},
  pages = {229-241},
  url = {http://www.gespraechsforschung-ozs.de/heft2007/px-schmidt.pdf}
}

Schmidt T (2006), "Interfacing Lexical and Ontological Information in a Multilingual Soccer FramNet", In Proceedings of OntoLex 2006. , pp. 24-27.

BibTeX:

@inproceedings{Schmidt2006b,
  author = {Thomas Schmidt},
  title = {Interfacing Lexical and Ontological Information in a Multilingual Soccer FramNet},
  booktitle = {Proceedings of OntoLex 2006},
  year = {2006},
  pages = {24-27},
  url = {http://www.kicktionary.de/RESOURCES/schmidt2006.pdf}
}

Schmidt T (2005), "Computergestützte Transkription - Modellierung und Visualisierung gesprochener Sprache mit texttechnologischen Mitteln" Frankfurt a. M. Vol. 7 Peter Lang.

BibTeX:

@book{Schmidt2005c,
  author = {Schmidt, Thomas},
  title = {Computergestützte Transkription - Modellierung und Visualisierung gesprochener Sprache mit texttechnologischen Mitteln},
  publisher = {Peter Lang},
  year = {2005},
  volume = {7},
  note = {DE},
  url = {http://www.exmaralda.org/files/Diss_INHALT.pdf}
}

Schmidt T (2005), "Datenarchive für die Gesprächsforschung. Perspektiven, Probleme und Lösungsansätze", Gesprächsforschung. Vol. 6, pp. 103-126.

BibTeX:

@article{Schmidt2005a,
  author = {Schmidt, Thomas},
  title = {Datenarchive für die Gesprächsforschung. Perspektiven, Probleme und Lösungsansätze},
  journal = {Gesprächsforschung},
  year = {2005},
  volume = {6},
  pages = {103-126},
  note = {DE},
  url = {http://www.gespraechsforschung-ozs.de/heft2005/px-schmidt.pdf}
}

Schmidt T (2005), "EXMARaLDA und die Datenbank "Mehrsprachigkeit" - Konzepte und praktische Erfahrungen", In Heterogeneity in Focus: Creating and Using Linguistic Databases. Potsdam Vol. 2, pp. 21-42. Universitätsverlag Potsdam.

[Abstract] [BibTeX] [URL]

Abstract: This paper presents some concepts and principles used in the devel-opment of a database of multilingual spoken discourse at the Univer-sity of Hamburg. The emphasis of the first part is on general consid-erations for the handling of heterogeneous data sets: After showing that diversity in transcription data is partly conceptually and partly technologically motivated, it is argued that the processing of transcrip-tion corpora should be approached via a three-level architecture which separates form (application) and content (data) on the one hand, and logical and physical data structures on the other hand. Such an archi-tecture does not only pave the way for modern text-technological ap-proaches to linguistic data processing, it can also help to decide where and how a standardization in the work with heterogeneous data is pos-sible and desirable and where it would run counter to the needs of the research community. It is further argued that, in order to ensure user acceptance, new solutions developed in this approach must take care not to abandon established concepts too quickly. The focus of the second part is on some practical experiences with users and technologies gained in the four years’ project work. Con-cerning the practical development work, the value of open standards like XML and Unicode is emphasized and some limitations of the “platform-independent” JAVA technology are indicated. With respect to users of the EXMARaLDA system, a predominantly conservative attitude towards technological innovations in transcription corpus work can be stated: individual users tend to stick to known functional-ities and are reluctant to adopt themselves to the new possibilities. Furthermore, an active commitment to cooperative corpus work still seems to be the exception rather than the rule. It is concluded that technological innovations can contribute their share to a progress in the work with heterogeneous linguistic data, but that they will have to be supplemented, in the long run, with an ade-quate methodological reflection and the creation of an appropriate in-frastructure.

BibTeX:

@incollection{Schmidt2005d,
  author = {Schmidt, Thomas},
  editor = {Dipper, Stefanie and Stede, Manfred},
  title = {EXMARaLDA und die Datenbank "Mehrsprachigkeit" - Konzepte und praktische Erfahrungen},
  booktitle = {Heterogeneity in Focus: Creating and Using Linguistic Databases},
  publisher = {Universitätsverlag Potsdam},
  year = {2005},
  volume = {2},
  pages = {21-42},
  note = {DE},
  url = {http://www.exmaralda.org/files/Paper_Potsdam.pdf}
}

Schmidt T (2005), "Modellbildung und Modellierungsparadigmen in der computergestützten Korpusanalyse", In Sprachtechnologie, mobile Kommunikation und linguistische Ressourcen. Beiträge zur GLDV-Tagung 2005 in Bonn. Frankfurt a. M. Vol. 8

BibTeX:

@inproceedings{Schmidt2005b,
  author = {Schmidt, Thomas},
  editor = {Fisseni, Bernhard and Schmitz, Hans-Christian and Schröder, Bernhard and Wagner, Petra},
  title = {Modellbildung und Modellierungsparadigmen in der computergestützten Korpusanalyse},
  booktitle = {Sprachtechnologie, mobile Kommunikation und linguistische Ressourcen. Beiträge zur GLDV-Tagung 2005 in Bonn},
  year = {2005},
  volume = {8},
  note = {DE}
}

Schmidt T (2005), "Time-based data models and the Text Encoding Initiative's guidelines for transcription of speech", Arbeiten zur Mehrsprachigkeit, Folge B. Vol. 62, pp. 1 ff.

BibTeX:

@article{Schmidt2005e,
  author = {Schmidt, Thomas},
  title = {Time-based data models and the Text Encoding Initiative's guidelines for transcription of speech},
  journal = {Arbeiten zur Mehrsprachigkeit, Folge B},
  year = {2005},
  volume = {62},
  pages = {1 ff},
  note = {EN},
  url = {http://www.exmaralda.org/files/SFB_AzM62.pdf}
}

Schmidt T (2004), "EXMARaLDA - ein Modellierungs- und Visualisierungsverfahren für die computergestützte Transkription gesprochener Sprache", In Proceedings of Konvens 2004. Wien Vol. 5

[Abstract] [BibTeX] [URL]

Abstract: This paper attempts a new look at computer assisted transcription as it is commonly practised within the fields of discourseanalysis and language acquisition studies.The first part proposes a bridge between discourse analytical methodology and text technological methods with the concept ofmodelling as its central idea. The secondpart demonstrates the EXMARaLDA system, a set of formats and tools for computerassisted transcription that builds on the ideas developed in the first part and implements them in a way that can lead to significant improvement in current research practice.

BibTeX:

@inproceedings{Schmidt2004,
  author = {Schmidt, Thomas},
  editor = {Buchberger, Ernst},
  title = {EXMARaLDA - ein Modellierungs- und Visualisierungsverfahren für die computergestützte Transkription gesprochener Sprache},
  booktitle = {Proceedings of Konvens 2004},
  year = {2004},
  volume = {5},
  note = {DE},
  url = {http://www.exmaralda.org/files/Konvens_Paper.pdf}
}

Schmidt T (2004), "EXMARaLDA - ein System zur computergestützten Diskurstranskription", In Automatische Textanalyse. Systeme und Methoden zur Annotation und Analyse natürlichsprachlicher Texte. Wiesbaden , pp. 203-218. Verlag für Sozialwissenschaften.

BibTeX:

@incollection{Schmidt2004b,
  author = {Schmidt, Thomas},
  editor = {Mehler, Alexander and Lobin, Henning},
  title = {EXMARaLDA - ein System zur computergestützten Diskurstranskription},
  booktitle = {Automatische Textanalyse. Systeme und Methoden zur Annotation und Analyse natürlichsprachlicher Texte},
  publisher = {Verlag für Sozialwissenschaften},
  year = {2004},
  pages = {203-218},
  note = {DE}
}

Schmidt T (2004), "Transcribing and annotating spoken language with EXMARaLDA", In Proceedings of the LREC-Workshop on XML based richly annotated corpora, Lisbon 2004. Paris ELRA.

[Abstract] [BibTeX] [URL]

Abstract: This paper describes EXMARaLDA, an XML-based framework for the construction, dissemination and analysis of corpora of spoken language transcriptions. Departing from a prototypical example of a “partitur” (musical score) transcription, the EXMARaLDA “single timeline, multiple tiers” data model and format is presented alongside with the EXMARaLDA Partitur-Editor, a tool for inputting and visualizing such data. This is followed by a discussion of the interaction of EXMARaLDA with other frameworks and tools that work with similar data models. Finally, this paper presents an extension of the “single timeline, multiple tiers” data model and describes its application within the EXMARaLDA system.

BibTeX:

@inproceedings{Schmidt2004a,
  author = {Schmidt, Thomas},
  title = {Transcribing and annotating spoken language with EXMARaLDA},
  booktitle = {Proceedings of the LREC-Workshop on XML based richly annotated corpora, Lisbon 2004},
  publisher = {ELRA},
  year = {2004},
  note = {EN},
  url = {http://www.exmaralda.org/files/Paper_LREC.pdf}
}

Schmidt T (2003), "Visualising Linguistic Annotation as Interlinear Text", Arbeiten zur Mehrsprachigkeit, Folge B. Vol. 46, pp. 1 ff..

BibTeX:

@article{Schmidt2003a,
  author = {Schmidt, Thomas},
  title = {Visualising Linguistic Annotation as Interlinear Text},
  journal = {Arbeiten zur Mehrsprachigkeit, Folge B},
  year = {2003},
  volume = {46},
  pages = {1 ff.},
  note = {EN},
  url = {http://www.exmaralda.org/files/Visualising-final.pdf}
}

Schmidt T (2002), "EXMARaLDA - ein System zur Diskurstranskription auf dem Computer", Arbeiten zur Mehrsprachigkeit, Folge B. Vol. 34, pp. 1 ff..

[Abstract] [BibTeX] [URL]

Abstract: EXMARaLDA is a system for computer transcription of spoken discourse that is being developed at the SFB ‚Mehrsprachigkeit’ as a basis of a multilingual discourse database into which the transcriptions in use at the SFB will be integrated at a later point in time. The present paper describes the theoretical background of the development – a formal model of discourse transcription based on the annotation graph formalism (Bird/Liberman (2001)) – and its practical realisation in the form of an XML-based data format and several tools for input, output and manipulation of the data.

BibTeX:

@article{Schmidt2002b,
  author = {Schmidt, Thomas},
  title = {EXMARaLDA - ein System zur Diskurstranskription auf dem Computer},
  journal = {Arbeiten zur Mehrsprachigkeit, Folge B},
  year = {2002},
  volume = {34},
  pages = {1 ff.},
  note = {DE},
  url = {http://www1.uni-hamburg.de/exmaralda/Daten/4D-Literatur/AZM.pdf}
}

Schmidt T (2002), "EXMARaLDA - un système de transcription computationelle comme base d'un corpus de la langue parlée multilingue", In Journée d’Ètude de l’ATALA. Paris

BibTeX:

@inproceedings{Schmidt2002c,
  author = {Thomas Schmidt},
  title = {EXMARaLDA - un système de transcription computationelle comme base d'un corpus de la langue parlée multilingue},
  booktitle = {Journée d’Ètude de l’ATALA},
  year = {2002},
  note = {FR},
  url = {http://www1.uni-hamburg.de/exmaralda/files/Montpellier.pdf}
}

Schmidt T (2002), "Gesprächstranskription auf dem Computer: das System EXMARaLDA", Gesprächsforschung. Vol. 3, pp. 1-23.

BibTeX:

@article{Schmidt2002a,
  author = {Schmidt, Thomas},
  title = {Gesprächstranskription auf dem Computer: das System EXMARaLDA},
  journal = {Gesprächsforschung},
  year = {2002},
  volume = {3},
  pages = {1-23},
  note = {DE},
  url = {http://www.gespraechsforschung-ozs.de/heft2002/px-schmidt.pdf}
}

Schmidt T (2002), "Stellungnahme zu Wolfgang Schneiders Artikel "Annotate in Transkriptionen aus DV-technischer Sicht"", Gesprächsforschung. Vol. 3, pp. 237-249.

BibTeX:

@article{Schmidt2002,
  author = {Schmidt, Thomas},
  title = {Stellungnahme zu Wolfgang Schneiders Artikel "Annotate in Transkriptionen aus DV-technischer Sicht"},
  journal = {Gesprächsforschung},
  year = {2002},
  volume = {3},
  pages = {237-249},
  note = {DE},
  url = {http://www.gespraechsforschung-ozs.de/heft2002/px-schmidt-2.pdf}
}

Schmidt T (2001), "The transcription system EXMARaLDA: An application of the annotation graph formalism as the Basis of a Database of Multilingual Spoken Discourse", In Proceedings of the IRCS Workshop On Linguistic Databases, 11-13 December 2001. Philadelphia , pp. 219-227. Institute for Research in Cognitive Science, University of Pennsylvania.

[Abstract] [BibTeX] [URL]

Abstract: This paper describes EXMARaLDA, a system for computer transcription of spoken discourse developed and used by the SFB "Mehrsprachigkeit" at the university of Hamburg. EXMARaLDA consists of several DTDs for XML coding of transcription data and some input and output tools for these formats. Apart from being a transcription system in its own right, EXMARaLDA also plays the role of a mediator between older existing data formats at the SFB and between these formats and a planned database of multilingual spoken discourse.

BibTeX:

@inproceedings{Schmidt2001,
  author = {Schmidt, Thomas},
  editor = {Bird, Steven and Buneman, Peter and Liberman, Mark},
  title = {The transcription system EXMARaLDA: An application of the annotation graph formalism as the Basis of a Database of Multilingual Spoken Discourse},
  booktitle = {Proceedings of the IRCS Workshop On Linguistic Databases, 11-13 December 2001},
  publisher = {Institute for Research in Cognitive Science, University of Pennsylvania},
  year = {2001},
  pages = {219-227},
  note = {EN},
  url = {http://www.exmaralda.org/files/IRCS_Paper.pdf}
}

Schmidt T (forthcoming), "Grundzüge von EXMARaLDA – einem System zur computergestützten Erstellung und Auswertung von Korpora gesprochener Sprache", In Bausteine diskursanalytischen Wissens. Berlin de Gruyter.

BibTeX:

@incollection{Schmidt2008b,
  author = {Thomas Schmidt},
  editor = {Jochen Rehbein and Shinichi Kameyama},
  title = {Grundzüge von EXMARaLDA – einem System zur computergestützten Erstellung und Auswertung von Korpora gesprochener Sprache},
  booktitle = {Bausteine diskursanalytischen Wissens},
  publisher = {de Gruyter},
  year = {forthcoming},
  note = {DE},
  url = {http://www1.uni-hamburg.de/exmaralda/files/Backstein.pdf}
}

Schmidt T and Bennöhr J (2008), "Rescuing Legacy Data", Language Documentation and Conservation. Vol. 2(1), pp. 109-129.

BibTeX:

@article{SchmidtBennoehr2008,
  author = {Schmidt, Thomas and Bennöhr, Jasmine},
  title = {Rescuing Legacy Data},
  journal = {Language Documentation and Conservation},
  year = {2008},
  volume = {2},
  number = {1},
  pages = {109-129},
  url = {http://hdl.handle.net/10125/1803}
}

Schmidt T, Chiarcos C, Lehmberg T, Rehm G, Witt A and Hinrichs E (2006), "Avoiding Data Graveyards: From Heterogeneous Data Collected in Multiple Research Projects to Sustainable Linguistic Resources", In Proceedings of the E-MELD 2006 Workshop on Digital Language Documentation: Tools and Standards: The State of the Art. Lansing, Michigan

[Abstract] [BibTeX] [URL]

Abstract: This paper describes a new research initiative addressing the issue of sustainability of linguistic resources. The initiative is a cooperation between three collaborative research centres in Germany – the SFB 441 “Linguistic Data Structures” in Tübingen, the SFB 538 “Multilingualism” in Hamburg, and the SFB 632 “Information Structure” in Potsdam/Berlin. The aim of the project is to develop methods for sustainable archiving of the diverse bodies of linguistic data used at the three sites. In the first half of the paper, the data handling solutions developed so far at the three centres are briefly introduced. This is followed by an assessment of their commonalities and differences and of what these entail for the work of the new joint initiative. The second part then sketches seven areas of open questions with respect to sustainable data handling and gives a more detailed account of two of them – integration of linguistic terminologies and development of best practice guidelines.

BibTeX:

@inproceedings{Schmidt2006,
  author = {Schmidt, Thomas and Chiarcos, Christian and Lehmberg, Timm and Rehm, Georg and Witt, Andreas and Hinrichs, Erhard},
  title = {Avoiding Data Graveyards: From Heterogeneous Data Collected in Multiple Research Projects to Sustainable Linguistic Resources},
  booktitle = {Proceedings of the E-MELD 2006 Workshop on Digital Language Documentation: Tools and Standards: The State of the Art},
  year = {2006},
  note = {EN},
  url = {http://www.exmaralda.org/files/EMELD_final.pdf}
}

Schmidt T, Duncan S, Ehmer O, Hoyt J, Kipp M, Magnusson M, Rose T and Sloetjes H (2009), "An Exchange Format for Multimodal Annotations", In Multimodal Corpora. , pp. 207-221. Springer.

BibTeX:

@incollection{MultiModalSpringer,
  author = {Schmidt, Thomas and Duncan, Susan and Ehmer, Oliver and Hoyt, Jeffrey and Kipp, Michael and Magnusson, Magnus and Rose, Travis and Sloetjes, Han},
  editor = {Michael Kipp, Jean-Claude Martin, P. Paggio and D. Heylen},
  title = {An Exchange Format for Multimodal Annotations},
  booktitle = {Multimodal Corpora},
  publisher = {Springer},
  year = {2009},
  pages = {207-221},
  url = {http://www.springer.com/computer/computer+imaging/book/978-3-642-04792-3}
}

Schmidt T, Duncan S, Ehmer O, Hoyt J, Kipp M, Magnusson M, Rose T and Sloetjes H (2008), "An exchange format for multimodal annotations", In Proceedings of the Language and Evalutation Conference 2008.

BibTeX:

@inproceedings{Schmidtetal2008,
  author = {Schmidt, Thomas and Duncan, Susan and Ehmer, Oliver and Hoyt, Jeffrey and Kipp, Michael and Magnusson, Magnus and Rose, Travis and Sloetjes, Han},
  title = {An exchange format for multimodal annotations},
  booktitle = {Proceedings of the Language and Evalutation Conference 2008},
  year = {2008}
}

Schmidt T, Elenius K and Trilsbeek P (2010), "Multimedia Corpora (Media encoding and annotation)". Thesis at: CLARIN.

BibTeX:

@techreport{CLARIN,
  author = {Thomas Schmidt and Kjell Elenius and Paul Trilsbeek},
  title = {Multimedia Corpora (Media encoding and annotation)},
  school = {CLARIN},
  year = {2010},
  url = {http://www.exmaralda.org/files/CLARIN_Standards.pdf}
}

Schmidt T, Geyken A and Storrer A (2008), "Refining and Exploiting the Structural Markup of the eWDG", In Proceedings of the XIII EURALEX International Congress.

BibTeX:

@inproceedings{Schmidt_Geyken_Storrer_2008,
  author = {Schmidt, Thomas and Geyken, Alexander and Storrer, Angelika},
  title = {Refining and Exploiting the Structural Markup of the eWDG},
  booktitle = {Proceedings of the XIII EURALEX International Congress},
  year = {2008}
}

Schmidt T and Schütte W (2010), "FOLKER: An Annotation Tool for Efficient Transcription of Natural, Multi-party Interaction", In Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC'10). Valletta, Malta, May, 2010. European Language Resources Association (ELRA).

BibTeX:

@inproceedings{SCHMIDT10.18,
  author = {Thomas Schmidt and Wilfried Schütte},
  editor = {Nicoletta Calzolari and Khalid Choukri and Bente Maegaard and Joseph Mariani and Jan Odjik and Stelios Piperidis and Mike Rosner and Daniel Tapias},
  title = {FOLKER: An Annotation Tool for Efficient Transcription of Natural, Multi-party Interaction},
  booktitle = {Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC'10)},
  publisher = {European Language Resources Association (ELRA)},
  year = {2010},
  url = {http://www.exmaralda.org/files/LREC_Folker.pdf}
}

Schmidt T and Wörner K (2014), "EXMARaLDA", In Handbook on Corpus Phonology. , pp. 402-419. Oxford University Press.

BibTeX:

@incollection{OUPEXMARaLDA,
  author = {Thomas Schmidt and Kai Wörner},
  editor = {Jacques Durand, Ulrike Gut and Gjert Kristoffersen},
  title = {EXMARaLDA},
  booktitle = {Handbook on Corpus Phonology},
  publisher = {Oxford University Press},
  year = {2014},
  pages = {402-419},
  url = {http://ukcatalogue.oup.com/product/9780199571932.do}
}

Schmidt T and Wörner K (2012), "Introduction", In Multilingual Corpora and Multilingual Corpus Analysis. Vol. 14, pp. ix – xi. John Benjamins.

BibTeX:

@incollection{HSM14-INT,
  author = {Thomas Schmidt and Kai Wörner},
  editor = {Thomas Schmidt and Kai Wörner},
  title = {Introduction},
  booktitle = {Multilingual Corpora and Multilingual Corpus Analysis},
  publisher = {John Benjamins},
  year = {2012},
  volume = {14},
  pages = {ix – xi}
}

Schmidt T and Wörner K (2009), "EXMARaLDA – Creating, analysing and sharing spoken language corpora for pragmatic research", Pragmatics. Vol. 19(4), pp. 565-582.

BibTeX:

@article{SchmidtWoerner2008,
  author = {Thomas Schmidt and Kai Wörner},
  title = {EXMARaLDA – Creating, analysing and sharing spoken language corpora for pragmatic research},
  journal = {Pragmatics},
  year = {2009},
  volume = {19},
  number = {4},
  pages = {565-582}
}

Schmidt T and Wörner K (2005), "Erstellen und Analysieren von Gesprächskorpora mit EXMARaLDA", Gesprächsforschung. Vol. 6, pp. 171-195.

[Abstract] [BibTeX] [URL]

Abstract: Dieser Aufsatz gibt einen Überblick über EXMARaLDA, ein System aus Daten-modell, Datenformaten und Software-Werkzeugen zum computergestützten Erstellen und Analysieren von Korpora gesprochener Sprache. Der Schwerpunkt der Darstellung liegt auf der Nutzung der verschiedenen Softwarewerkzeuge – ein Partitur-Editor zum Erstellen von Transkriptionen, ein Corpus-Manager zumErstellen und Verwalten von Korpora und ein Suchwerkzeug zum Auswerten sol-cher Korpora – für gesprächsanalytische Zwecke

BibTeX:

@article{Schmidt2005,
  author = {Schmidt, Thomas and Wörner, Kai},
  title = {Erstellen und Analysieren von Gesprächskorpora mit EXMARaLDA},
  journal = {Gesprächsforschung},
  year = {2005},
  volume = {6},
  pages = {171-195},
  note = {DE},
  url = {http://www.gespraechsforschung-ozs.de/heft2005/px-woerner.pdf}
}

Schmidt T, Wörner K, Hedeland H and Lehmberg T (2013), "Leitfaden zur Beurteilung von Aufbereitungsaufwand und Nachnutzbarkeit von Korpora gesprochener Sprache". Thesis at: Hamburger Zentrum für Sprachkorpora, Universität Hamburg/Archiv für Gesprochenes Deutsch, IDS Mannheim.

BibTeX:

@techreport{Schmidt2013,
  author = {Schmidt, Thomas and Wörner, Kai and Hedeland, Hanna and Lehmberg, Timm},
  title = {Leitfaden zur Beurteilung von Aufbereitungsaufwand und Nachnutzbarkeit von Korpora gesprochener Sprache},
  school = {Hamburger Zentrum für Sprachkorpora, Universität Hamburg/Archiv für Gesprochenes Deutsch, IDS Mannheim},
  year = {2013},
  url = {https://corpora.uni-hamburg.de/pdf/Leitfaden_Aufbereitungsaufwand_und_Nachnutzbarkeit_von_Korpora.pdf}
}

Schmidt T, Wörner K, Hedeland H and Lehmberg T (2011), "New and future developments in EXMARaLDA", In Multilingual Resources and Multilingual Applications. Proceedings of GSCL Conference 2011 Hamburg..

[Abstract] [BibTeX] [URL]

Abstract: We present some recent and planned future developments in EXMARaLDA, a system for creating, managing, analysing and publishing spoken language corpora. The new functionality concerns the areas of transcription and annotation, corpus management, query mechanisms, interoperability and corpus deployment. Future work is planned in the areas of automatic annotation, standardisation and workflow management.

BibTeX:

@inproceedings{EXMARaLDA_GSCL_2011,
  author = {Thomas Schmidt and Kai Wörner and Hanna Hedeland and Timm Lehmberg},
  editor = {Thomas Schmidt and Kai Wörner},
  title = {New and future developments in EXMARaLDA},
  booktitle = {Multilingual Resources and Multilingual Applications. Proceedings of GSCL Conference 2011 Hamburg.},
  year = {2011},
  url = {http://www.exmaralda.org/files/Exmaralda_GSCL2011.pdf}
}

Schneider W (2002), "Annotationsstrukturen in Transkripten. DV-technische Strukturanforderungen für Annotate exemplifiziert an EXMARaLDA", Gesprächsforschung. Vol. 3, pp. 192-236.

BibTeX:

@article{Schneider2002,
  author = {Schneider, Wolfgang},
  title = {Annotationsstrukturen in Transkripten. DV-technische Strukturanforderungen für Annotate exemplifiziert an EXMARaLDA},
  journal = {Gesprächsforschung},
  year = {2002},
  volume = {3},
  pages = {192-236},
  note = {DE},
  url = {http://www.gespraechsforschung-ozs.de/heft2002/px-schneider.pdf}
}

Schneider W (2002), "Kritische Stellungnahme zur Darstellung DV-technischer Aspekte in Norbert Dittmars Buch „Transkription“", Gesprächsforschung. Vol. 3, pp. 38-43.

BibTeX:

@article{Schneider2002a,
  author = {Schneider, Wolfgang},
  title = {Kritische Stellungnahme zur Darstellung DV-technischer Aspekte in Norbert Dittmars Buch „Transkription“},
  journal = {Gesprächsforschung},
  year = {2002},
  volume = {3},
  pages = {38-43},
  note = {DE},
  url = {http://www.gespraechsforschung-ozs.de/heft2002/ko-schneider.pdf}
}

Schneider W (2001), "Der Transkriptionseditor HIAT-DOS", Gesprächsforschung. Vol. 2, pp. 29-33.

BibTeX:

@article{Schneider2001,
  author = {Schneider, Wolfgang},
  title = {Der Transkriptionseditor HIAT-DOS},
  journal = {Gesprächsforschung},
  year = {2001},
  volume = {2},
  pages = {29-33},
  note = {DE},
  url = {http://www.gespraechsforschung-ozs.de/heft2001/px-schneider2.pdf}
}

Schröder I (2011), "Sprachvariation in Norddeutschland", Archived in Hamburger Zentrum für Sprachkorpora. Version 0.1. Publication date 2011-01-01., 1, 2011.

BibTeX:

@unpublished{sin,
  author = {Schröder, Ingrid},
  title = {Sprachvariation in Norddeutschland},
  year = {2011},
  url = {http://hdl.handle.net/11022/0000-0000-7EE3-3}
}

Slavcheva A and Meißner C (2014), "Building and Maintaining the GeWiss Corpus: Perspectives on the Construction, Sustainability and Further Enrichment of Spoken Corpora, A Showcase", In Best Practices for Speech Corpora in Linguistic Research. , pp. pp. 20-35. Cambridge Scholars Publishing.

BibTeX:

@incollection{Haugh2012e,
  author = {Adriana Slavcheva and Cordula Meißner},
  editor = {Michael Haugh and Şükriye Ruhi and Thomas Schmidt and Kai Wörner},
  title = {Building and Maintaining the GeWiss Corpus: Perspectives on the Construction, Sustainability and Further Enrichment of Spoken Corpora, A Showcase},
  booktitle = {Best Practices for Speech Corpora in Linguistic Research},
  publisher = {Cambridge Scholars Publishing},
  year = {2014},
  pages = {pp. 20-35},
  url = {http://www.cambridgescholars.com/download/sample/61765}
}

Stein D (2012), "Multi-Word Expressions in the Spanish Bhagavad Gita, Extracted with Local Grammars Based on Semantic Classes.", Proceedings to the LREC'2012 Workshop: LRE-Rel Language Resources and Evaluation for Religious Texts.. Vol. -, pp. 88-93.

BibTeX:

@article{mwelrec2012,
  author = {Daniel Stein},
  title = {Multi-Word Expressions in the Spanish Bhagavad Gita, Extracted with Local Grammars Based on Semantic Classes.},
  journal = {Proceedings to the LREC'2012 Workshop: LRE-Rel Language Resources and Evaluation for Religious Texts.},
  year = {2012},
  volume = {-},
  pages = {88-93}
}

Stift U-M and Schmidt T (2014), "Mündliche Korpora am IDS: Vom Deutschen Spracharchiv zur Datenbank für Gesprochenes Deutsch.", In Ansichten und Einsichten. 50 Jahre Institut für Deutsche Sprache.. , pp. 360-375. Institut für Deutsche Sprache.

BibTeX:

@incollection{StiftSchmidt2014,
  author = {Ulf-Michael Stift and Thomas Schmidt},
  editor = {Institut für Deutsche Sprache},
  title = {Mündliche Korpora am IDS: Vom Deutschen Spracharchiv zur Datenbank für Gesprochenes Deutsch.},
  booktitle = {Ansichten und Einsichten. 50 Jahre Institut für Deutsche Sprache.},
  publisher = {Institut für Deutsche Sprache},
  year = {2014},
  pages = {360-375},
  url = {http://ids-pub.bsz-bw.de/frontdoor/index/index/docId/2477}
}

Ulloa MS, Lleó C and Sanchez IG (2012), "Corpora of spoken Spanish by simultaneous and successive German-Spanish bilingual and Spanish monolingual children", In Multilingual Corpora and Multilingual Corpus Analysis. Vol. 14, pp. 97–106. John Benjamins.

[Abstract] [BibTeX]

Abstract: This article describes a database of Spanish recorded speech comprised of four corpora. The corpora contain cross-sectional data of Spanish spoken in contact with German. The first corpus, ALCE-BLA (Bilingual Language Acquisition at school age), is comprised of the utterances of 23 Spanish-German simultaneous bilingual children living in Germany and attending the Spanish complementary school at the first level. The second corpus, Phon-cL2, contains the utterances of 15 German children who have learned (or are learning) Spanish after the age of 2;0. The third corpus, Madrid-PhonBLA, contains utterances of 71 Spanish-German simultaneous bilingual children from Madrid (Spain). The fourth corpus, PhonMAS, contains utterances of monolingual Spanish children, who have been recorded with the purpose to be compared with the bilingual corpora.

BibTeX:

@incollection{Ulloa2012,
  author = {Marta Saceda Ulloa and Conxita Lleó and Izarbe Garcia Sanchez},
  editor = {Thomas Schmidt and Kai Wörner},
  title = {Corpora of spoken Spanish by simultaneous and successive German-Spanish bilingual and Spanish monolingual children},
  booktitle = {Multilingual Corpora and Multilingual Corpus Analysis},
  publisher = {John Benjamins},
  year = {2012},
  volume = {14},
  pages = {97–106}
}

Westpfahl S and Schmidt T (2013), "POS für(s) FOLK – Part of Speech Tagging des Forschungs- und Lehrkorpus Gesprochenes Deutsch.", Journal for Language Technology and Computational Linguistics. , pp. 139-156.

[Abstract] [BibTeX] [URL]

Abstract: Im Rahmen des FOLK-Projekts (Forschungs- und Lehrkorpus Gesprochenes Deutsch), das am Institut für Deutsche Sprache (IDS) ein großes wissenschaftsöffentliches Gesprächskorpus aufbaut, soll mit Hilfe des TreeTaggers (SCHMID 1995) und des Stuttgart-TübingenTagsets (STTS), (SCHILLER ET AL. 1999) ein automatisiertes Part-of-Speech-Tagging (POSTagging) für Spontansprache ermöglicht werden. Zuerst nur auf FOLK angewendet, soll dieser Tagger später auch für weitere Korpora spontansprachlicher Daten in der Datenbank für Gesprochenes Deutsch (DGD), (INSTITUT FÜR DEUTSCHE SPRACHE) genutzt werden. Da
das Forschungs- und Lehrkorpus kontinuierlich ausgebaut wird, muss das POS-Tagging aus Effizienzgründen mittelfristig vollautomatisch erfolgen. Dabei wird eine Fehlerquote von unter 5 Prozent angestrebt

BibTeX:

@article{POS_FOLK,
  author = {Swantje Westpfahl and Thomas Schmidt},
  title = {POS für(s) FOLK – Part of Speech Tagging des Forschungs- und Lehrkorpus Gesprochenes Deutsch.},
  journal = {Journal for Language Technology and Computational Linguistics},
  year = {2013},
  pages = {139-156},
  url = {http://ids-pub.bsz-bw.de/frontdoor/index/index/docId/2223}
}

Witt A, Rehm G, Hinrichs E, Lehmberg T and Stegmann J (2009), "SusTEInability of Linguistic Resources through Feature Structures", Literary & linguistic compu-ting : LLC ; journal of the Association for Literary and Linguistic Computing. Vol. 24 (2009) 3, pp. 363-372.

BibTeX:

@article{Witt2009,
  author = {Witt, Andreas and Rehm, Georg and Hinrichs, Erhard and Lehmberg, Timm and Stegmann, Jens},
  title = {SusTEInability of Linguistic Resources through Feature Structures},
  journal = {Literary & linguistic compu-ting : LLC ; journal of the Association for Literary and Linguistic Computing},
  year = {2009},
  volume = {24 (2009) 3},
  pages = {363-372}
}

Witt A, Rehm G, Lehmberg T and Hinrichs E (2007), "Mapping Multi-Rooted Trees from a Sustainable Exchange Format to TEI Feature Structures", In TEI@20: 20 Years of Sup-porting the Digital Humanities. The 20th Anniversary Text Encoding Initiative Consortium Members’ Meeting. October 31–November 3, University of Maryland, College Park, USA.

BibTeX:

@inproceedings{Witt2007,
  author = {Witt, Andreas and Rehm, Georg and Lehmberg, Timm and Hinrichs, Erhard},
  title = {Mapping Multi-Rooted Trees from a Sustainable Exchange Format to TEI Feature Structures},
  booktitle = {TEI@20: 20 Years of Sup-porting the Digital Humanities. The 20th Anniversary Text Encoding Initiative Consortium Members’ Meeting. October 31–November 3, University of Maryland, College Park, USA},
  year = {2007}
}

Wörner K (2012), "Finding the balance between strict defaults and total openness: Collecting and managing metadata for spoken language corpora with the EXMARaLDA Corpus Manager", In Multilingual Corpora and Multilingual Corpus Analysis. Vol. 14, pp. 383-400. John Benjamins.

[Abstract] [BibTeX]

Abstract: The paper takes a look on existing metadata schemes for transcriptions of spoken language as well as written texts and emphasizes on their advantages and disadvantages. It introduces the metadata model of EXMARaLDA, which has an implementation in the EXMARaLDA Corpus Manager (Coma). The paper jusitifies the decisions that led to a data model that does not presuppose many metadata items (thus risking inconsistencies) and relies on XML files (thus potentially sacrificing performance).

BibTeX:

@incollection{HSM14-WOE,
  author = {Kai Wörner},
  editor = {Thomas Schmidt and Kai Wörner},
  title = {Finding the balance between strict defaults and total openness: Collecting and managing metadata for spoken language corpora with the EXMARaLDA Corpus Manager},
  booktitle = {Multilingual Corpora and Multilingual Corpus Analysis},
  publisher = {John Benjamins},
  year = {2012},
  volume = {14},
  pages = {383-400}
}

Wörner K (2010), "A Tool for Feature-Structure Stand-Off-Annotation on Transcriptions of Spoken Discourse", In Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC'10). Valletta, Malta, May, 2010. European Language Resources Association (ELRA).

BibTeX:

@inproceedings{WRNER10.69,
  author = {Kai Wörner},
  editor = {Nicoletta Calzolari and Khalid Choukri and Bente Maegaard and Joseph Mariani and Jan Odjik and Stelios Piperidis and Mike Rosner and Daniel Tapias},
  title = {A Tool for Feature-Structure Stand-Off-Annotation on Transcriptions of Spoken Discourse},
  booktitle = {Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC'10)},
  publisher = {European Language Resources Association (ELRA)},
  year = {2010},
  url = {http://www.exmaralda.org/files/LREC_Sextant.pdf}
}

Wörner K (2010), "Werkzeuge zur flachen Annotation von Transkriptionen gesprochener Sprache". Thesis at: Universität Bielefeld. http://bieson.ub.uni-bielefeld.de/volltexte/2010/1669/

BibTeX:

@phdthesis{Woerner2010,
  author = {Kai Wörner},
  title = {Werkzeuge zur flachen Annotation von Transkriptionen gesprochener Sprache},
  school = {Universität Bielefeld},
  year = {2010},
  url = {http://bieson.ub.uni-bielefeld.de/volltexte/2010/1669/pdf/diss_gold.pdf}
}

Wörner K, Witt A, Rehm G and Dipper S (2006), "Modelling Linguistic Data Structures", In Proceedings of the Extreme Markup Languages 2006. Montréal, Canada

[Abstract] [BibTeX] [URL]

Abstract: Linguistic corpora have been annotated by means of SGML-based markup languages for almost 20 years. We can, very roughly, differentiate between three distinct evolutionary stages of markup technologies. (1) Originally, single SGML tree-based document instances were deemed sufficient for the representation of linguistic structures. (2) Linguists began to realize that alternatives and extensions to the traditional model are needed. Formalisms such as, for example, NITE were proposed: the NITE Object Model (NOM) consists of multi-rooted trees. (3) We are now on the threshold of the third evolutionary stage: even NITE's very flexible approach is not suited for all linguistic purposes. As some structures, such as these, cannot be modeled by multi-rooted trees, an even more flexible approach is needed in order to provide a generic annotation format that is able to represent genuinely arbitrary linguistic data structures.

BibTeX:

@inproceedings{Woerner2006,
  author = {Wörner, Kai and Witt, Andreas and Rehm, Georg and Dipper, Stefanie},
  title = {Modelling Linguistic Data Structures},
  booktitle = {Proceedings of the Extreme Markup Languages 2006},
  year = {2006},
  note = {EN},
  url = {http://conferences.idealliance.org/extreme/html/2006/Witt01/EML2006Witt01.html}
}

Zinsmeister H and Breckle M (2012), "A multilingual corpus of spoken learner German and learner English", In Multilingual Corpora and Multilingual Corpus Analysis. Vol. 14, pp. pp. 71–96. John Benjamins.

[Abstract] [BibTeX]

Abstract: The ALesKo learner corpus is a small-scale comparable corpus consisting of two subcorpora: annotated essays by advanced Chinese learners of German and comparable essays by German native speakers. The motivation for its compilation was the investigation of discourse-related phenomena such as local coherence in second-language acquisition of German. After introducing how the texts were compiled and annotated, the article focuses on quantitative studies at the token level. We discuss problems of tokenisation and part-of-speech tagging and compare the inventory of the two subcorpora in terms of frequently used N-grams and lexical richness, among other aspects. We conclude the article by describing possible applications of the study in foreign language acquisition research and language teaching.

BibTeX:

@incollection{Zinsmeister2012,
  author = {Heike Zinsmeister and Margit Breckle},
  editor = {Thomas Schmidt and Kai Wörner},
  title = {A multilingual corpus of spoken learner German and learner English},
  booktitle = {Multilingual Corpora and Multilingual Corpus Analysis},
  publisher = {John Benjamins},
  year = {2012},
  volume = {14},
  pages = {pp. 71–96}
}

(2014), "Best Practices for Speech Corpora in Linguistic Research", In Best Practices for Speech Corpora in Linguistic Research. , pp. pp. 1-19. Cambridge Scholars Publishing.

BibTeX:

@book{Haugh2012f,,
  editor = {Michael Haugh and Şükriye Ruhi and Thomas Schmidt and Kai Wörner},
  title = {Best Practices for Speech Corpora in Linguistic Research},
  booktitle = {Best Practices for Speech Corpora in Linguistic Research},
  publisher = {Cambridge Scholars Publishing},
  year = {2014},
  pages = {pp. 1-19},
  url = {http://www.cambridgescholars.com/download/sample/61765}
}

(2012), "Best Practices for Speech Corpora in Linguistic Research"

BibTeX:

@proceedings{LREC-WORKSHOP,,
  editor = {Michael Haugh and Şükriye Ruhi and Thomas Schmidt and Kai Wörner},
  title = {Best Practices for Speech Corpora in Linguistic Research},
  year = {2012}
}

(2012), "Multilingual Corpora and Multilingual Corpus Analysis" Vol. 14 John Benjamins.

BibTeX:

@book{HSM14,,
  editor = {Thomas Schmidt and Kai Wörner},
  title = {Multilingual Corpora and Multilingual Corpus Analysis},
  publisher = {John Benjamins},
  year = {2012},
  volume = {14}
}

Hier bekommen Sie die Bibliographie als Download.