malach_pubs.bib

@INPROCEEDINGS{WB_05_Interface_Search,
  AUTHOR = {W. Bainbridge and D.W. Oard and R.W. White},
  TITLE = {An Interface to Search Human Movements Based on Geographic and Chronological
	Metadata},
  BOOKTITLE = {Proceedings of the 28th Annual International ACM SIGIR Conference
	on Research and Development in Information Retrieval},
  YEAR = {2005},
  ABSTRACT = {Historians and scholars can better understand historic events by studying
	the geographic and chronological activity of individuals who witnessed
	them. A lack of adequate tools to help users study these activities
	can hinder the process of learning and discovery. In this paper
	we present an interface to address this problem that contains three
	components: a map, a timeline, and a text representation of a survivor’s
	movements. These components simultaneously provide query input (where
	users can specify their needs) and dynamic results display (where
	users can immediately see the effect of their decisions). The results
	of a pilot study show that users reacted positively to the interface.},
  OWNER = {dkw},
  PDF = {pubs/WB_05_Interf_Search.pdf},
  TIMESTAMP = {2006.07.17}
}

@INPROCEEDINGS{ZB_06_pygmy,
  AUTHOR = {Z. Band and R.W. White},
  TITLE = {Pygmy{B}rowse: A Small Screen Tree Brower},
  BOOKTITLE = {{CHI} '06: {CHI} '06 Extended Abstracts on Human Factors in Computing
	Systems},
  YEAR = {2006},
  PAGES = {514-519},
  ADDRESS = {Montréal, Québec, Canada},
  PUBLISHER = {ACM Press, New York, NY, USA},
  ABSTRACT = {We present PygmyBrowse, a browser that allows users to navigate a
	tree data structure in a limited amount of display space. A pilot
	evaluation of PygmyBrowse was conducted, and results suggest that
	it reduces task completion times and increases user satisfaction
	over two alternative node-link tree browsers.},
  DOI = {http://doi.acm.org/10.1145/1125451.1125562},
  OWNER = {dkw},
  PDF = {pubs/ZB_06_pygmy.pdf},
  TIMESTAMP = {2006.08.01}
}

@INPROCEEDINGS{WB_06_Min_Bayes_Risk,
  AUTHOR = {W. Byrne},
  TITLE = {Minimum {B}ayes Risk Estimation and Decoding in Large Vocabulary
	Continuous Speech Recognition},
  BOOKTITLE = {Proceedings of the Institute of Electronics, Information, and Communication
	Engineers, Japan – Special Section on Statistical Modeling for Speech
	
	
	Processing, E89-D(3)},
  YEAR = {2006},
  ADDRESS = {Japan},
  MONTH = {March},
  ABSTRACT = {Minimum Bayes risk estimation and decoding strategies based on lattice
	segmentation techniques can be used to refine large vocabulary continuous
	speech recognition systems through the estimation of the parameters
	of the underlying hidden Markov models and through the identification
	of smaller recognition tasks which provides the opportunity to incorporate
	novel modeling and decoding procedures in LVCSR. These techniques
	are discussed in the context of going ‘beyond HMMs’, howing in particular
	that this process of subproblem identification makes it possible
	to train and apply small-domain binary pattern classifiers, such
	as Support Vector Machines, to large vocabulary continuous speech
	recognition.},
  OWNER = {dkw},
  PDF = {pubs/WB_06_Min_Bayes_Risk_Decod.pdf},
  TIMESTAMP = {2006.07.15}
}

@INPROCEEDINGS{WB_04_Min_Bayes_Risk,
  AUTHOR = {W. Byrne},
  TITLE = {Minimum {B}ayes Risk Estimation and Decoding in Large Vocabulary
	Continuous Speech Recognition},
  BOOKTITLE = {Proceedings of the ATR Workshop -- Beyond HMMs},
  YEAR = {2004},
  ADDRESS = {Kyoto, Japan},
  MONTH = {December},
  ABSTRACT = {Minimum risk estimation and decoding strategies based on lattice segmentation
	techniques can be used to refine large vocabulary continuous speech
	recognition systems through the estimation of the parameters of
	the underlying hidden Mark models and through the identification
	of smaller recognition tasks which provides the opportunity to incorporate
	novel modeling and decoding procedures in LVCSR. These techniques
	are discussed in the context of going beyond HMMs.},
  OWNER = {dkw},
  PDF = {pubs/WB_04_Min_Bay_BeyondHMMs.pdf},
  TIMESTAMP = {2006.07.15}
}

@ARTICLE{malach_sptrans03,
  AUTHOR = {W. Byrne and D. Doermann and M. Franz and S. Gustman and J. Hajic
	and D.W. Oard and M. Picheny and J. Psutka and B. Ramabhadran and
	D. Soergel and T. Ward and {Wei-Jing~Zhu}},
  TITLE = {Automatic Recognition of Spontaneous Speech for Access to Multilingual
	Oral History Archives},
  JOURNAL = {{IEEE} Transactions on Speech and Audio Processing, Special Issue
	on Spontaneous Speech Processing},
  YEAR = {2004},
  VOLUME = {12},
  PAGES = {420-435},
  NUMBER = {4},
  MONTH = {July},
  ABSTRACT = {The MALACH project has the goal of developing the technologies needed
	to facilitate access to large collections of spontaneous speech.
	Its aim is to dramatically improve the state of the art in key Automatic
	Speech Recognition (ASR), Natural Language Processing (NLP) technologies
	for use in large-scale retrieval systems. The project leverages
	a unique collection of oral history interviews with survivors of
	the Holocaust that has been assembled and extensively annotated
	by the Survivors of the Shoah Visual History Foundation.
	
	
	This paper describes the collection, 116,000 hours of interviews in
	32 languages, and the way in which system requirements have been
	discerned through user studies. It discusses ASR methods for very
	difficult speech (heavily accented, emotional, and elderly spontaneous
	speech), including transcription to create training data and methods
	for language modeling and speaker adaptation. Results are presented
	for for English and Czech. NLP results are presented for named entity
	tagging, topic segmentation, and supervised topic classification,
	and the architecture of an integrated search system that uses these
	results is described.}
}

@MISC{malach_asist_2002,
  AUTHOR = {{D. Soergel (moderator)} and S. Gustman and M. Kornbluh and B. Ramabhadran
	and J. Goldman},
  TITLE = {Panel Discussion on Access to Large Spoken Archives: Uses and Technology},
  HOWPUBLISHED = {Proc. of the 65th ASIS\&T Annual Meeting, Philadelphia, PA},
  MONTH = {November},
  YEAR = {2002},
  NOTE = {Medford, NJ: Information Today. p. 469-470},
  ABSTRACT = {With recent advances in information technology, digital archiving
	is emerging as an important and practical method for capturing the
	human experience. Large amounts of spoken materials and audiovisual
	materials in which speech is an important component are becoming
	available. This panel will discuss the uses of these materials for
	education, information retrieval and dissemination, and research,
	the requirements that arise from these uses, and speech recognition
	and retrieval technologies being developed to meet these requirements.
	These materials have tremendous potential for enriching the presentation
	of information in education, newscasts and documentaries, but retrieval
	from and access to these large repositories pose significant challenges.
	The panel will provide an overview of these issues.},
  PDF = {pubs/ASIST2002SpokenArchives.pdf}
}

@ARTICLE{VD_05_Lat_Segment,
  AUTHOR = {V. Doumpiotis and W. Byrne},
  TITLE = {Lattice Segmentation and Minimum {B}ayes Risk Discriminative Training
	for Large Vocabulary Continuous Speech Recognition},
  JOURNAL = {Speech Communication},
  YEAR = {2005},
  VOLUME = {2},
  PAGES = {142-160},
  ABSTRACT = {Lattice segmentation techniques developed for Minimum Bayes Risk decoding
	in large vocabulary speech recognition tasks are used to compute
	the statistics for discriminative training algorithms that estimate
	HMM parameters so as to reduce the overall risk over the training
	data. New estimation procedures are developed and evaluated for
	small vocabulary and large vocabulary recognition tasks, and additive
	performance improvements are shown relative to maximum mutual information
	estimation. These relative gains are explained through a detailed
	analysis of individual word recognition errors.},
  OWNER = {dkw},
  PDF = {pubs/VD_05_Lat_Segment.pdf},
  TIMESTAMP = {2006.07.15}
}

@INPROCEEDINGS{VD_04_Pinched_Lat,
  AUTHOR = {V. Doumpiotis and W. Byrne},
  TITLE = {Pinched Lattice Minimum {B}ayes-risk Discriminative Training for
	Large Vocabulary Continuous Speech Recognition},
  BOOKTITLE = {Proceedings of the International Conference on Spoken Language Processing},
  YEAR = {2004},
  MONTH = {September},
  ABSTRACT = {Iterative estimation procedures that minimize empirical risk based
	on general loss functions such as the Levenshtein distance have
	been derived as extensions of the Extended Baum Welch algorithm.
	While reducing expected loss on training data is a desirable training
	criterion, these algorithms can be difficult to apply. They are
	unlike MMI estimation in that they require an explicit listing of
	the hypotheses to be considered and in complex problems such lists
	tend to be prohibitively large. To overcome this difficulty, modeling
	techniques originally developed to improve search efficiency in
	Minimum Bayes Risk decoding can be used to transform these estimation
	algorithms so that exact update, risk minimization procedures can
	be used for complex recognition problems. Experimental results in
	two large vocabulary speech recognition tasks show improvements
	over conventionally trained MMIE models.},
  OWNER = {dkw},
  PDF = {pubs/VD_04_Pinched_lat.pdf},
  TIMESTAMP = {2006.07.15}
}

@ARTICLE{VD_05_Discrim_Linear,
  AUTHOR = {V. Doumpiotis and S. Tsakalidis and W. Byrne},
  TITLE = {Discriminative Linear Transforms for Feature Normalization and Speaker
	Adaptation in {HMM} Estimation},
  JOURNAL = {{IEEE} Transactions on Speech and Audio Processing},
  YEAR = {2005},
  VOLUME = {13(3)},
  MONTH = {May},
  ABSTRACT = {Linear transforms have been used extensively for training and adaptation
	of HMM-based ASR systems. Recently procedures have been developed
	for the estimation of linear transforms under the Maximum Mutual
	Information (MMI) criterion. In this paper we introduce discriminative
	training procedures that employ linear transforms for feature normalization
	and for speaker adaptive training. We integrate these discriminative
	linear transforms into MMI estimation of HMM parameters for improvement
	of large vocabulary conversational speech recognition systems.},
  PDF = {pubs/VD_05_Discrim_linear.pdf}
}

@INPROCEEDINGS{dtsmbr_icassp03,
  AUTHOR = {V. Doumpiotis and S. Tsakalidis and W. Byrne},
  TITLE = {Discriminative Training for Segmental Minimum {B}ayes-Risk Decoding},
  BOOKTITLE = {IEEE Conference on Acoustics, Speech and Signal Processing},
  YEAR = {2003},
  ORGANIZATION = {IEEE},
  ABSTRACT = {A modeling approach is presented that incorporates discriminative
	training procedures within segmental Minimum Bayes-Risk decoding
	(SMBR). SMBR is used to segment lattices produced by a general automatic
	speech recognition (ASR) system into sequences of separate decis
	ion problems involving small sets of confusable words. Acoustic
	models specialized to discriminate between the competing words in
	these classes are then applied in subsequent SMBR rescoring passes.
	Refinement of the search space that allows the use of specialized
	discriminative models is shown to be an improvement over rescoring
	with conventionally trained discriminative models. },
  PDF = {pubs/dmtsmbr.icassp03.pdf}
}

@INPROCEEDINGS{smbdt_eurospeech03,
  AUTHOR = {V. Doumpiotis and S. Tsakalidis and W. Byrne},
  TITLE = {Lattice Segmentation and Minimum {B}ayes Risk Discriminative Training},
  BOOKTITLE = {Proc. of the European Conference on Speech Communication and Technology
	(EUROSPEECH)},
  YEAR = {2003},
  ABSTRACT = {Modeling approaches are presented that incorporate discriminative
	training procedures in segmental Minimum Bayes-Risk decoding (SMBR).
	SMBR is used to segment lattices produced by a general automatic
	speech recognition (ASR) system into sequences of separate decision
	problems involving small sets of confusable words. We discuss two
	approaches to incorporating these segmented lattices in discriminative
	training. We investigate the use of acoustic models specialized
	to discriminate between the competing words in these classes which
	are then applied in subsequent SMBR rescoring passes. Refinement
	of the search space that allows the use of specialized discriminative
	models is shown to be an improvement over rescoring with conventionally
	trained discriminative models.},
  PDF = {pubs/eurosp03dtsmbr.pdf}
}

@INPROCEEDINGS{franz03:_infor,
  AUTHOR = {M. Franz and B. Ramabhadran and M. Picheny},
  TITLE = {Information Access in Large Spoken Archives},
  BOOKTITLE = {Proceedings of the ISCA Multilingual Spoken Document Retrieval Workshop},
  YEAR = {2003},
  ADDRESS = {Macau},
  ABSTRACT = {Digital archives have emerged as the pre-eminent method for capturing
	the human experience. Before such archives can be used efficiently,
	their contents must be described. The scale of such archives along
	with the associated cost make it impractical to provide access via
	purely manual means, but automatic technologies for search in spoken
	materials still have relatively limited capabilities. The NSF funded
	MALACH project will use the world's largest digital archive of video
	oral histories, collected by the Survivors of the Shoah Visual History
	Foundation (VHF) to make a quantum leap in the ability to access
	such archives by advancing the state-of-the-art in Automated Speech
	Recognition (ASR), Natural Language Processing (NLP) and related
	technologies. This corpus consists of over 115,000 hours of unconstrained,
	natural speech from 52,000 speakers in 32 different languages, filled
	with disfluencies, heavy accents, age-related coarticualtions, and
	un-cued speaker and language switching. This paper discusses some
	of the ASR and NLP tools and technologies that we have been building
	for the English speech in the MALACH corpus. We will also discuss
	this new test bed while emphasizing the unique characteristics of
	this corpus.},
  PDF = {pubs/sdr_0203.pdf}
}

@INPROCEEDINGS{franz03:_autom_tansc_topic_segmen_large_spoken_archiv,
  AUTHOR = {M. Franz and B. Ramabhadran and T. Ward and M. Picheny},
  TITLE = {Automated Transcription and Topic Segmentation of Large Spoken Archives},
  BOOKTITLE = {Proceedings of {EUROSPEECH}},
  YEAR = {2003},
  ADDRESS = {Geneva},
  MONTH = {September},
  ABSTRACT = {Digital archives have emerged as the pre-eminent method for capturing
	the human experience. Before such archives can be used efficiently,
	their contents must be described. The scale of such archives along
	with the associated content mark up cost make it impractical to
	provide access via purely manual means, but automatic technologies
	for search in spoken materials still have relatively limited capabilities.
	The NSF-funded MALACH project will use the worldoral histories,
	collected by the Survivors of the Shoah Visual History Foundation
	(VHF) to make a quantum leap in the ability to access such archives
	by advancing the state-of-the-art in Automated Speech Recognition
	(ASR), Natural Language Processing (NLP) and related technologies.
	This corpus consists of over 115,000 hours of unconstrained, natural
	speech from 52,000 speakers in 32 different languages, filled with
	disfluencies, heavy accents, age-related coarticulations, and un-cued
	speaker and language switching. This paper discusses some of the
	ASR and NLP tools and technologies that we have been building for
	the English speech in the MALACH corpus. We also discuss this new
	test bed while emphasizing the unique characteristics of this corpus.},
  PDF = {pubs/euro03-ir.pdf}
}

@INPROCEEDINGS{VG_04_Seg-Min,
  AUTHOR = {V. Goel and S. Kumar and W. Byrne},
  TITLE = {Segmental Minimum {B}ayes-Risk Decoding for Automatic Speech Recognition},
  BOOKTITLE = {Transactions of Speech and Audio Processing},
  YEAR = {2004},
  VOLUME = {12},
  PAGES = {234-249},
  MONTH = {May},
  PUBLISHER = {IEEE},
  NOTE = {Correction Available : In our recently published paper, we presented
	a risk-based lattice cutting procedure to segment ASR word lattices
	into smaller sub-lattices as a means to improve the efficiency of
	Minimum Bayes-Risk (MBR) rescoring. In the experiments reported,
	some of the hypotheses in the original lattices were inadvertently
	discarded during segmentation, and this affected MBR performance
	adversely. This note gives the corrected results as well as experiments
	demonstrating that the segmentation process does not discard any
	paths from the original lattice. http://mi.eng.cam.ac.uk/~wjb31/ppubs/smbrtsapcorr.pdf},
  ABSTRACT = {Minimum Bayes-Risk (MBR) speech recognizers have been shown to yield
	improvements over the search over word lattices. We present a Segmental
	Minimum Bayes-Risk decoding (SMBR) framework that simplifies the
	implementation of MBR recognizers through the segmentation of the
	N-best lists or lattices over which the recognition is to be performed.
	This paper presents lattice cutting procedures that underly SMBR
	decoding. Two of these procedures are based on a risk minimization
	criterion while a third one is guided by word-level confidence scores.
	In conjunction with SMBR decoding, these lattice segmentation procedures
	give consistent improvements in recognition word error rate (WER)
	on the Switchboard corpus. We also discuss an application of risk-based
	lattice cutting to multiplesystem SMBR decoding and show that it
	is related to other system combination techniques such as ROVER.
	This strategy combines lattices produced from multiple ASR systems
	and is found to give WER improvements in a Switchboard evaluation
	system.},
  OWNER = {dkw},
  PDF = {pubs/VG_04_Seg_Min.pdf},
  TIMESTAMP = {2006.07.15}
}

@ARTICLE{JG_05_Access_Spoken,
  AUTHOR = {J. Goldman and S. Renals and S. Bird and F. de Jong and M. Federico
	and C. Fleischhauer and M. Kornbluth and L. Lamel and D.W. Oard
	and F. Sebastiani and C. Stewart and R. Wright},
  TITLE = {Accessing the Spoken Word},
  JOURNAL = {International Journal on Digital Libraries},
  YEAR = {2005},
  VOLUME = {5},
  PAGES = {287-298},
  NUMBER = {4},
  MONTH = {August},
  ABSTRACT = {Spoken word audio collections cover many domains, including radio
	and television broadcasts, oral narratives, governmental proceedings,
	lectures, and telephone conversations. The collection, access and
	preservation of such data is stimulated by political, economic,
	cultural and educational needs. This paper outlines the major issues
	in the field, reviews the current state of technology, examines
	the rapidly changing policy issues relating to privacy and copyright,
	and presents issues relating to the collection and preservation
	of spoken audio content.},
  OWNER = {dkw},
  PDF = {pubs/JG_05_Access_Spoken_Word.pdf},
  TIMESTAMP = {2006.07.17}
}

@INPROCEEDINGS{malach_jcdl_2002,
  AUTHOR = {S. Gustman and D. Soergel and D.W. Oard and W. Byrne and M. Picheny
	and B. Ramabhadran and D. Greenberg},
  TITLE = {Supporting Access to Large Digital Oral History Archives},
  BOOKTITLE = {Proceedings of the Joint Conference on Digital Libraries},
  YEAR = {2002},
  PAGES = {18--27},
  MONTH = {July},
  ABSTRACT = {This paper describes our experience with the creation, indexing, and
	provision of access to a very large archive of videotaped oral histories
	16,000 hours of digitized interviews in 32 languages from 52,000
	survivors, liberators, rescuers, and witnesses of the Nazi Holocaust.
	It goes on to identify a set of critical research issues that must
	be addressed if we are to provide full and detailed access to collections
	of this size: issues in user requirement studies, automatic speech
	recognition, automatic classification, segmentation, summarization,
	retrieval, and user interfaces. The paper ends by inviting others
	to discuss use of these materials in their own research.},
  PDF = {pubs/JCDL2002MALACH.pdf}
}

@INPROCEEDINGS{XH_06_Evid_Persp,
  AUTHOR = {X. Huang and D. Soergel},
  TITLE = {An Evidence Perspective on Topical Relevance Types and Its Implications
	for Exploratory and Task-Based Retrieval},
  BOOKTITLE = {ISIC},
  YEAR = {2006},
  ADDRESS = {Sydney, Australia},
  MONTH = {July},
  ABSTRACT = {The common view of topical relevance is limited to topic matching,
	resulting in IR systems' failure to detect more complex topical
	connections which are needed to respond to diversified user situations
	and tasks. To reveal the complex evidential relationships involved
	in topical relevance, we analyzed relevance assessments in the domain
	of history that used four types of topical relevance: Direct, indirect,
	context, and comparison. Each of these plays a special role in reasoning,
	making a conclusive argument, or performing a task. Incorporating
	these relevance types into IR systems allows users more flexibility
	and a better focus on their tasks.},
  OWNER = {dkw},
  PDF = {pubs/XH_06_Evid_Persp.pdf},
  TIMESTAMP = {2006.07.31}
}

@INPROCEEDINGS{XH_05_iConf_poster,
  AUTHOR = {X. Huang and D. Soergel},
  TITLE = {Evidence-Based Interpretation of Topical Relevance Types: {T}owards
	a Richer Understanding of Topical Relevance},
  BOOKTITLE = {Poster at Ph.D. Poster Session, i-Conference},
  YEAR = {2005},
  ADDRESS = {Penn State University},
  OWNER = {dkw},
  PDF = {pubs/XH_05_iConf_poster.pdf},
  TIMESTAMP = {2006.08.02}
}

@INPROCEEDINGS{XH_04_Rel_Judges,
  AUTHOR = {X. Huang and D. Soergel},
  TITLE = {Relevance Judges’ Understanding of Topical Relevance Types: An Explication
	of an Enriched Concept of Topical Relevance},
  BOOKTITLE = {67th Annual Meeting of the American Society for Information Science
	and Technology (ASIS\&T)},
  YEAR = {2004},
  ABSTRACT = {Despite the centrality of topical relevance in in-formation retrieval
	system design and evaluation, understanding and implementation of
	it is usually limited to “direct overall topical matching” between
	the subject of the query and the subject of the document. The underlying
	assumption is that only a single type of topical relationship is
	involved. In related work, a relevance judgment instrument was developed
	for the Multilingual Access to Large Spoken ArCHives project (MALACH).
	It incorporates the five topical relevance types (direct relevance,
	indirect/circumstantial relevance, context relevance, comparison
	relevance, and pointer relevance) and was applied by four judges
	to items in the MALACH test collection in Summer 2003. This paper
	reports on the experiences and perceptions of the judges making
	more nuanced judgments about topical relevance. The results suggest
	that more than only one variable/dimension, “whether it is on topic”
	as usually referred to, contributes to topical relevance, and more
	than a single topical relationship type, “direct matching” as generally
	assumed, play an important role in topical relevance.},
  OWNER = {dkw},
  PDF = {pubs/XH_04_Rel_Judges.pdf},
  TIMESTAMP = {2006.07.17}
}

@INPROCEEDINGS{XH_05_Policy_Captur,
  AUTHOR = {X. Huang and R.W. White},
  TITLE = {Policy Capturing Models for Multi-Faceted Relevance Judgments},
  BOOKTITLE = {Proceedings of the 68th ASIS\&T Annual Meeting},
  YEAR = {2005},
  ABSTRACT = {We applied policy capturing and bootstrapping methods to investigate
	the relevance judgment process, with a particular focus on understanding
	how judges summarize an overall relevance judgment from five specific
	aspects of relevance. Our
	
	data come from relevance judgments made in the development of the
	MALACH (Multilingual Access to Large Spoken ArCHives) Speech Retrieval
	Test Collection. We developed a linear model for each of four relevance
	judges by regressing his or her overall judgments on the five specific
	relevance aspects. According to these models, different judges tended
	to assign different importance weights to different aspects. One
	of the linear models was applied to seven new judgment sets and
	was highly successful at predicting accurate overall judgments for
	the seven judgment sets.},
  OWNER = {dkw},
  PDF = {pubs/XH_05_Policy_Cap.pdf},
  TIMESTAMP = {2006.07.17}
}

@INPROCEEDINGS{DI_06_Invest_XLang,
  AUTHOR = {D. Inkpen and M. Alzghool and G.J.F. Jones and D.W. Oard},
  TITLE = {Investigating Cross-Language Speech Retrieval for a Spontaneous Conversational
	Speech Collection},
  BOOKTITLE = {Conference on Human Language Technologies and the North American
	Chapter of the Ass'n for Computational Linguistics},
  YEAR = {2006},
  ADDRESS = {New York},
  ABSTRACT = {Cross-language retrieval of spontaneous speech combines the challenges
	of working with noisy automated document transcripts and language
	translation. The CLEF 2005 Cross-Language Speech Retrieval (CL-SR)
	task provides a standard test collection to investigate these challenges.
	In our experimental investigation we show that we can improve retrieval
	performance by careful selection of the term weighting scheme and
	by combining the automatic transcripts with manually-assigned metadata.
	We further show that online machine translation resources can be
	used for topic translation to give effective CL-SR.},
  OWNER = {dkw},
  PDF = {pubs/DI_06_Invest_XLang.pdf},
  TIMESTAMP = {2006.08.01}
}

@INPROCEEDINGS{JK_03_Search_Large_Coll,
  AUTHOR = {J. Kim and D.W. Oard and D. Soergel},
  TITLE = {Searching Large Collections of Recorded Speech: A Preliminary Study},
  BOOKTITLE = {Annual Conference of the American Society for Information Science
	and Technology},
  YEAR = {2003},
  ADDRESS = {Long Beach, CA},
  MONTH = {April},
  ABSTRACT = {This paper reports on an exploratory study of the criteria searchers
	use when judging the relevance of recorded speech from radio programs
	and the attributes of a recording on which those judgments are based.
	Five volunteers each performed three searches using two systems
	(NPR Online and SpeechBot) for three questions and judged the relevance
	of the results. Data were collected through observation and screen
	capture, think aloud, and interviews; coded; and analyzed by looking
	for patterns. Criteria used as a basis for selection were found
	to be similar to those observed in relevance studies with printed
	materials, but the attributes used as a basis for assessing those
	criteria were found to exhibit modality-specific characteristics.
	For example, audio replay was often found to be necessary when assessing
	story genre (e.g., report, interview, commentary) because of limitations
	in presently available metadata. Participants reported a strong
	preference for manually prepared summaries over passages extracted
	from automatic speech recognition transcripts, and consequential
	differences in search behavior were observed between the two conditions.
	Some important implications for interface and component design are
	drawn, such as the utility of summaries at multiple levels of detail
	in view of the difficulty of skimming imperfect transcripts and
	the potential utility of automatic speaker identification to support
	authority judgments in systems.},
  OWNER = {dkw},
  PDF = {JK_03_Search_Large_Coll.pdf},
  TIMESTAMP = {2006.07.15}
}

@INPROCEEDINGS{JK_03_User_Inter,
  AUTHOR = {J. Kim and D. Soergel and D.W. Oard},
  TITLE = {User Interaction in Speech and Video Retrieval: Relevance Judgment
	and Query Reformulation},
  BOOKTITLE = {Presented at the HCIL Annual Symposium and Open House},
  YEAR = {2003},
  ADDRESS = {College Park, MD, USA},
  MONTH = {May},
  ABSTRACT = {Speech retrieval systems are now beginning to appear as a means to
	access to spoken collections (news, oral histories, phone messages,
	recordings of meetings, etc.), but we do not yet understand well
	how these systems will be used. The purpose of this study is to
	explore the user behavior in interactive speech retrieval systems
	in the context of oral histories. In particular, this study seeks
	to answer such questions as: what relevance criteria searchers apply
	when they select a recording or a passage, how searchers attempt
	to match their query formulations to their information needs, and
	what metadata or information searchers find valuable both in making
	relevance judgments and in improving their queries. The study uses
	qualitative research methods. Eight participants that include faculty,
	Holocaust scholars, a documentary film producer, and a high school
	teacher searched the Shoah Visual History Foundation's collection
	that consists of 116,000 hours of 52,000 testimonies in 32 different
	languages from the survivors, liberators, rescuers and witnesses
	of the Holocaust. Each participant performed a series of searches
	based on her/his own interests over a period of one to two weeks.
	Data were collected through observation and screen capture, think
	aloud, and semi-structured interviews. Coding is being done and
	analyzed in order to find patterns. System and interface designers
	will benefit from the findings of this study in building future
	interactive speech retrieval systems. For example, the knowledge
	of preferred metadata adopted by searchers may suggest what metadata
	should be catalogued and to what level. The knowledge of how searchers
	reformulate their queries may inform system designers what information
	the system should present and what tools it should provide in order
	to support interactive searching.},
  OWNER = {dkw},
  TIMESTAMP = {2006.08.01}
}

@INPROCEEDINGS{KNL_05_Rel_Crit,
  AUTHOR = {K.N. Lawley and D. Soergel and X. Huang},
  TITLE = {Relevance Criteria Used by Teachers in Selecting Oral History Materials},
  BOOKTITLE = {Proceedings of the Annual Meeting of the American Society for Information
	Science \&
	
	Technology ({ASIS\&T})},
  YEAR = {2005},
  ADDRESS = {Charlotte, NC},
  MONTH = {October},
  ABSTRACT = {User-centered perspectives of relevance acknowledge the task-specific
	nature of relevance assessment, but we understand little about the
	retrieval and assessment tasks of teachers, a professional population
	with an important purpose. We observed eight school teachers throughout
	a collaborative process of designing lesson plans and searching
	for appropriate oral history materials and found an array of relevance
	criteria that pertain specifically to teaching. The objectives implied
	in their criteria correspond to the teaching objectives described
	in lesson plans and teacher interviews, including connecting with
	students, representing diversity, and teaching tolerance. Our findings
	suggest user-oriented design approaches that support retrieval of
	instructional materials in line with the needs and knowledge of
	teachers.},
  OWNER = {dkw},
  PDF = {pubs/KNL_05_ASIST.pdf},
  TIMESTAMP = {2006.07.17}
}

@INPROCEEDINGS{KNL_05_iConf_poster,
  AUTHOR = {K.N. Lawley and D. Soergel and R.W. White and X. Huang},
  TITLE = {Teachers' Search for Multimedia Lesson Plan Materials: Study, Results,
	and Design Implications for Oral History Archives},
  BOOKTITLE = {Poster presented at i-Conference 2005: The First Conference of the
	i-School Community},
  YEAR = {2005},
  ADDRESS = {State College, PA},
  MONTH = {September},
  ABSTRACT = {When teachers collect materials to use in their classrooms, they engage
	in a special case of information-seeking that involves task-specific
	relevance criteria and other workflow-related considerations. We
	observed eight middle-school and high-school teachers as they participated
	in a week-long workshop to collaborate on designing modular lesson
	plans for tolerance education. The lesson plans used passages of
	Holocaust survivor testimonies selected from a collection that was
	gathered and cataloged by the Survivors of the Shoah Visual History
	Foundation. The organization of the workshop provided a unique opportunity
	to understand how the selection of oral history materials occurs
	within the context of creating lesson plans.
	
	
	These context-rich data describe some of the motives, preferences,
	and constraints that influence how and why teachers seek and select
	oral history materials for tolerance education. The lessons learned
	from this workshop directly informed the design of a user interface
	that supports the needs and behaviors that we observed among teachers.
	Our conceptual framework draws on literature related to user-centered
	relevance, task-oriented information seeking, instructional design,
	and personalized instruction. Our poster will set forth the relevance
	criteria, strategies, and obstacles we observed during the workshop
	as well as the interface components that support teachers' workflow.},
  OWNER = {dkw},
  PDF = {pubs/KNL_05_iConf_poster.pdf},
  TIMESTAMP = {2006.07.17}
}

@INPROCEEDINGS{BL_06_One-Sided,
  AUTHOR = {B. Liu and D.W. Oard},
  TITLE = {One-{S}ided Measures for Evaluating Ranked Retrieval Effectiveness
	with Spontaneous Conversational Speech},
  BOOKTITLE = {Poster presented at SIGIR},
  YEAR = {2006},
  NOTE = {accepted},
  OWNER = {dkw},
  TIMESTAMP = {2006.08.01}
}

@INPROCEEDINGS{LM_06_Stat_Phrase,
  AUTHOR = {L. Mathias and W. Byrne},
  TITLE = {Statistical Phrase-Based Speech Translation},
  BOOKTITLE = {{IEEE} Conference on Acoustics, Speech and Signal Processing},
  YEAR = {2006},
  ABSTRACT = {A generative statistical model of speech-to-text translation is developed
	as an extension of existing models of phrase-based text translation.
	Speech is translated by mapping ASR word lattices to lattices of
	phrase sequences which are then translated using operations developed
	for text translation. Performance is reported on Chinese to English
	translation of Mandarin Broadcast News.},
  OWNER = {dkw},
  PDF = {pubs/LM_06_Stat_Phrase.pdf},
  TIMESTAMP = {2006.07.17}
}

@INPROCEEDINGS{oard04:_trans_acces_spoken_word,
  AUTHOR = {D.W. Oard},
  TITLE = {Transforming Access to the Spoken Word},
  BOOKTITLE = {Proceedings of the International Symposium on Large-Scale Knowledge
	Resources},
  YEAR = {2004},
  MONTH = {March},
  ORGANIZATION = {Tokyo Institute of Technology},
  NOTE = {http://www.coe21-lkr.titech.ac.jp/english/symposium.html},
  ABSTRACT = {For thousands of years, the written word has held a special place
	in our lives. In part, this results from two key characteristics:
	durability and searchability. Over the past several decades, the
	spoken word has gradually acquired those characteristics. In our
	lifetimes, it seems reasonable to expect that trend to continue,
	and indeed to accelerate, as improvements in automatic speech recognition
	begin to enable large-scale access to spontaneous conversational
	speech. This paper identifies four fundamental challenges that must
	be overcome if we are to leverage this remarkable new capability
	for the greatest benefit, briefly describes one project that is
	exploring this new frontier, and then concludes by looking toward
	future research on this important problem.},
  PDF = {pubs/oard_IntSympLKR_2004.pdf}
}

@MISC{oard_talk_Nov2003,
  AUTHOR = {D.W. Oard},
  TITLE = {Speaking to the Future},
  MONTH = {November},
  YEAR = {2003},
  SLIDES = {pubs/oard_malach_talk_nov03.ppt}
}

@MISC{oard_talk_Oct2003,
  AUTHOR = {D.W. Oard},
  TITLE = {Searching Spoken Word Collections},
  MONTH = {October},
  YEAR = {2003},
  NOTE = {Presented at Columbia University},
  SLIDES = {pubs/malach_columbia_oard.ppt}
}

@INPROCEEDINGS{malach_cla_tsd02,
  AUTHOR = {D.W. Oard and D. Demner-Fushman and J. Hajic and B. Ramabhadran and
	S. Gustman and W. Byrne and D. Soergel and B. Dorr and P. Resnik
	and M. Picheny},
  TITLE = {Cross-Language Access to Recorded Speech in the {MALACH} Project},
  BOOKTITLE = {Proceedings of the Text, Speech, and Dialog Workshop},
  YEAR = {2002},
  ADDRESS = {Brno, Czech Republic},
  ABSTRACT = {The MALACH project seeks to help users find information in a vast
	multilingual collection of untranscribed oral history interviews.
	This paper introduces the goals of the project and focuses on supporting
	access by users who are unfamiliar with the interview language.
	It begins with a review of the state of the art in cross-language
	speech retrieval; approaches that will be investigated are then
	described. Czech was selected as the first non-English language
	to be supported, so results of an initial experiment with Czech/English
	cross-language retrieval are reported.},
  PDF = {pubs/malach_cla_tsd02.pdf},
  SLIDES = {pubs/tsd2002e.ppt}
}

@INPROCEEDINGS{malach_aaai_2003,
  AUTHOR = {D.W. Oard and A. Leuski},
  TITLE = {Searching Recorded Speech Based on the Temporal Extent of Topic Labels},
  BOOKTITLE = {Processings of AAAI Spring Symposium on Intelligent Multimedia Knowledge
	Management},
  YEAR = {2003},
  MONTH = {March},
  ABSTRACT = {Recorded speech poses unusual challenges for the design of interactive
	end-user search systems. Automatic speech recognition is sufficiently
	accurate to support the automated components of interactive search
	systems in some applications. Recognizing useful recordings among
	those nominated by the system is difficult, however, because listening
	to audio is time consuming and because recognition errors and speech
	disfluencies make it difficult to mitigate this time factor by skimming
	automatic transcripts. Support for the browsing process based on
	supervised learning for automatic classification has shown promise,
	however, and a segment-then-label framework has emerged as the dominant
	paradigm for applying that technique to news broadcasts. This paper
	argues for a more general framework, which we call activation matrices,
	that provide a flexible representation for the mapping between labels
	and time. Three approaches to the generation of activation matrices
	could be generated are briefly described, with the main focus of
	the paper being the use of activation matrices to support search
	and selection in interactive systems.},
  PDF = {pubs/malach_aaai_2003.pdf}
}

@INPROCEEDINGS{oard04:_build_infor_retriev_test_collec,
  AUTHOR = {D.W. Oard and D. Soergel and D. Doermann and X. Huang and G.C. Murray
	and J. Wang and B. Ramabhadran and M. Franz and S. Gustman and J.
	Mayfield and L. Kharevych and S. Strassel},
  TITLE = {Building an Information Retrieval Test Collection for Spontaneous
	Conversational Speech},
  BOOKTITLE = {Proceedings of {SIGIR}'04},
  YEAR = {2004},
  ADDRESS = {Sheffield, U.K.},
  MONTH = {July},
  PUBLISHER = {ACM},
  ABSTRACT = {Test collections model use cases in ways that facilitate evaluation
	of information retrieval systems. This paper describes the use of
	search-guided relevance assessment to create a test collection for
	retrieval of spontaneous conversational speech. Approximately 10,000
	thematically coherent segments were manually identified in 625 hours
	of oral history interviews with 246 individuals. Automatic speech
	recognition results, manually prepared summaries, controlled vocabulary
	indexng, and name authority control are available for every segment.
	Those features were leveraged by a team of four relevance assessors
	to identify topically relevant segments for 28 topics developed
	from actual user requests. Search-guided assessment yielded sufficient
	interannotator agreement to support formative evaluation during
	system development. Baseline results for ranked retrieval are presented
	to illustrate use of the collection.},
  PDF = {pubs/malach_sigir04.pdf}
}

@INPROCEEDINGS{JSO_06_Coupl_Trng_Set,
  AUTHOR = {J.S. Olsson},
  TITLE = {An Analysis of the Coupling between Training Set and Neighborhood
	Sizes for the kNN Classifier},
  BOOKTITLE = {SIGIR},
  YEAR = {2006},
  ABSTRACT = {We consider the relationship between training set size and the parameter
	k for the k-Nearest Neighbors (kNN) classifier. When few examples
	are available, we observe that accuracy is sensitive to k and that
	best k tends to increase with training size. We explore the subsequent
	risk that k tuned on partitions will be suboptimal after aggregation
	and re-training. This risk is found to be most severe when little
	data is available. For larger training sizes, accuracy becomes increasingly
	stable with respect to k and the risk decreases.},
  KEYWORDS = {text classification, k-Nearest Neighbors, parameter tuning, parameter
	stability},
  OWNER = {dkw},
  PDF = {pubs/JSO_06_Coupl_Trng_Set.pdf},
  TIMESTAMP = {2006.08.01}
}

@INPROCEEDINGS{JSO_05_CL_Text_Class,
  AUTHOR = {J.S. Olsson and D.W. Oard and J. Hajic},
  TITLE = {Cross-Language Text Classification},
  BOOKTITLE = {Proceedings of the 28th Annual International ACM SIGIR Conference
	on Research and Development in Information Retrieval},
  YEAR = {2005},
  PAGES = {645-646},
  ADDRESS = {Salvador, Brazil},
  MONTH = {August},
  PUBLISHER = {ACM Press, New York, NY},
  KEYWORDS = {cross-language text classication and topic classication},
  OWNER = {dkw},
  PDF = {pubs/JSO_05_CL_Text_Class.pdf},
  TIMESTAMP = {2006.07.17}
}

@INPROCEEDINGS{CP_06_Recog_Emot,
  AUTHOR = {C. Pietsch and B. Ramabhadran},
  TITLE = {A Novel Approach to the Automatic Recognition of Emotions in Natural
	Speech},
  BOOKTITLE = {ISEF},
  YEAR = {2006},
  OWNER = {dkw},
  TIMESTAMP = {2006.08.01}
}

@INPROCEEDINGS{malach_icassp04,
  AUTHOR = {J. Psutka and J. Hajic and W. Byrne},
  TITLE = {{ASR} for {S}lavic Languages in the {MALACH} Project},
  BOOKTITLE = {IEEE Conference on Acoustics, Speech and Signal Processing},
  YEAR = {2004},
  ORGANIZATION = {IEEE},
  NOTE = {{\em Invited Paper in Special Session on Multilingual Speech Processing}},
  ABSTRACT = {The development of acoustic training material for Slavic languages
	within the MALACH project is described. Initial experience with
	the variety of speakers and the difficulties encountered in transcribing
	Czech, Slovak, and Russian language oral history are described along
	with ASR recognition results intended investigate the effectiveness
	of different transcription conventions that address language specific
	phenomena within the task domain.},
  PDF = {pubs/JP_04_ASR_Slavic_Lang.pdf}
}

@INPROCEEDINGS{tsd03_ruasr,
  AUTHOR = {J. Psutka and I. Iljuchin and P. Ircing and J.V. Psutka and V. Trejbal
	and W. Byrne and J. Hajic and S. Gustman},
  TITLE = {Building {LVCSR} Systems for transcription of spontaneously produced
	{R}ussian witnesses in the {MALACH} project: Initial steps and first
	results},
  BOOKTITLE = {Proceedings of the Text, Speech, and Dialog Workshop},
  YEAR = {2003},
  ABSTRACT = {The MALACH project uses the world's largest digital archive of video
	oral histories collected by the Survivors of the Shoah Visual History
	Foundation (VHF) and attempts to access such archives by advancing
	the state-of-the-art in Automatic Speech Recognition and Information
	Retrieval. This paper discusses the intial steps and first results
	in building large vocabulary continuous speech recognition (LVCSR)
	systems for the transcription of Russian witnesses. As the third
	language processed in the MALACH project (following English and
	Czech), Russian has posed new ASR challenges, especially in phonetic
	modeling. Although most of the Russian testimonies were provided
	by native Russian survivors, the speakers come from many different
	regions and countries resulting in a diverse collection of accented
	spontaneous Russian speech.},
  PDF = {pubs/Russian_MALACH_tsd03.pdf}
}

@INPROCEEDINGS{psutka04:_issues_annot_czech_spont_speec,
  AUTHOR = {J. Psutka and P. Ircing and J. Hjic and V. Radova and J.V. Psutka
	and W. Byrne and S. Gustman},
  TITLE = {Issues in Annotation of the {C}zech Spontaneous Speech Corpus in
	the {MALACH} Project},
  BOOKTITLE = {Proceedings of the International Conference on Language Resources
	and Evaluation, LREC},
  YEAR = {2004},
  ABSTRACT = {The paper present the issues encountered in processing spontaneous
	Czech speech in the MALACH project. Specific problems connected
	with a frequent occurrence of colloquial words in spontaneous Czech
	are analyzed; a partial solution is proposed and experimentally
	evaluated.},
  PDF = {pubs/JP_04_Annot_Czech.pdf}
}

@INPROCEEDINGS{tsd03_czasr,
  AUTHOR = {J. Psutka and P. Ircing and J.V. Psutka and V. Radova and W. Byrne
	and J. Hajic and S. Gustman},
  TITLE = {Towards automatic transcription of spontaneous {C}zech speech in
	the {MALACH} project},
  BOOKTITLE = {Proceedings of the Text, Speech, and Dialog Workshop},
  YEAR = {2003},
  ABSTRACT = {Our paper discusses the progress achieved during a one-year effort
	in building the Czech LVCSR system for the automatic transcription
	of spontaneously produced testimonies in the MALACH project. The
	difficulty of this task stems from the highly inflectional nature
	of the Czech language and is further multiplied by the presence
	of many colloquial words in spontaneous Czech speech as well as
	by the need to handle emotional speech filled with disfluencies,
	heavy accents, age-related coarticulation and language switching.
	In this paper we concentrate mainly on the acoustic modeling issues
	- the proper choice of front-end paramterization, the handling of
	non-speech events in acoustic modeling, and unsupervised acoustic
	adaptation via MLLR. A method for selecting suitable language modeling
	data is also briefly discussed.},
  PDF = {pubs/Czech_Malach_tsd03_1.pdf}
}

@INPROCEEDINGS{czasr_tsd02,
  AUTHOR = {J. Psutka and P. Ircing and J.V. Psutka and V. Radova and W. Byrne
	and J. Hajic and S. Gustman and B. Ramabhadran},
  TITLE = {Automatic Transcription of {C}zech Language Oral History in the {MALACH}
	Project: Resources and Initial Experiments},
  BOOKTITLE = {Proceedings of the Text, Speech, and Dialog Workshop},
  YEAR = {2002},
  ABSTRACT = {In this paper we describe the initial stages of the ASR component
	of the MALACH project. This project will attempt to provide improved
	access to the large multilingual spoken archives collected by the
	Survivors of the Shoah Visual History Foundation by advancing the
	state of the art in automated speech recognition. In order to train
	the ASR system, it is necessary to manually transcribe a large amount
	of speech data, identify the appropriate vocabulary, and obtain
	relevant text for language modeling. We give a detailed description
	of the speech annotation process; show the specific properties of
	the spontaneous speech contained in the archives; and present baseline
	speech recognition results.},
  PDF = {pubs/tsd02.psutka.pdf}
}

@INPROCEEDINGS{malachczasr_eurospeech03,
  AUTHOR = {J. Psutka and P. Ircing and J.V. Psutka and V. Radovic and W. Byrne
	and J. Hajic and J. Mirovsky and S. Gustman},
  TITLE = {Large Vocabulary {ASR} for Spontaneous {C}zech in the {MALACH} Project},
  BOOKTITLE = {Proc. of the European Conference on Speech Communication and Technology
	(EUROSPEECH)},
  YEAR = {2003},
  ABSTRACT = {This paper describes LVCSR research into the automatic transcription
	of spontaneous Czech speech in the MALACH (Multilingual Access to
	Large Spoken Archives) project. This project attempts to provide
	improved access to the large multilingual spoken archives collected
	by the Survivors of the Shoah Visual History Foundation (VHF) (www.vhf.org)
	by advancing the state of the art in automated speech recognition.
	We describe a baseline ASR system and discuss the problems in language
	modeling that arise from the nature of Czech as a highly inflectional
	language that also exhibits diglossia between its written and spontaneous
	forms. The difficulties of this task are compounded by heavily accented,
	emotional and disfluent speech along with frequent switching between
	languages. To overcome the limited amount of relevant language model
	data we use statistical techniques for selecting an appropriate
	training corpus from a large unstructured text collection resulting
	in significant reductions in word error rate. recognition and retrieval
	techniques to improve cataloging efficiency and eventually to provide
	direct access into the archive itself.},
  PDF = {pubs/malachczeurosp03.pdf}
}

@INPROCEEDINGS{BR_05_Exploit,
  AUTHOR = {B. Ramabhadran},
  TITLE = {Exploiting Large Quantities of Spontaneous Speech for Unsupervised
	Training of Acoustic Models},
  BOOKTITLE = {Proceedings of {INTERSPEECH 2005}, Eurospeech},
  YEAR = {2005},
  OWNER = {dkw},
  TIMESTAMP = {2006.08.03}
}

@MISC{BR_06_Transc_Next_Top_Model,
  AUTHOR = {B. Ramabhadran},
  TITLE = {{[PANEL]} Transcription's Next Top Model: Life Beyond Broadcast News
	and Telephony Conversations},
  HOWPUBLISHED = {{IEEE-ASRU}},
  YEAR = {2005},
  OWNER = {dkw},
  TIMESTAMP = {2006.08.01}
}

@INPROCEEDINGS{ramabhadran03:_impac_audio_segmen_segmen_clust,
  AUTHOR = {B. Ramabhadran and J. Huang and U. Chaudhari and G. Iyengar and H.J.
	Nock},
  TITLE = {Impact of Audio Segmentation and Segment Clustering on Automated
	Transcription Accuracy of Large Spoken Archives},
  BOOKTITLE = {Proceedings of {EUROSPEECH}},
  YEAR = {2003},
  ADDRESS = {Geneva},
  MONTH = {September},
  ABSTRACT = {This paper addresses the influence of audio segmentation and segment
	clustering on automatic transcription accuracy for large spoken
	archives. The work forms part of the ongoing MALACH project, which
	is developing advanced techniques for supporting access to the world
	of video oral histories collected in many languages from over 52,000
	survivors and witnesses of the Holocaust. We present several audio-only
	and audio-visual segmentation schemes, including two novel schemes:
	the first is iterative and audio-only, the second uses audio-visual
	synchrony. Unlike most previous work, we evaluate these schemes
	in terms of their impact upon recognition accuracy. Results on English
	interviews show the automatic segmentation schemes give performance
	comparable to (exhorbitantly expensive and impractically lengthy)
	manual segmentation when using a single pass decoding strategy based
	on speaker-independent models. However, when using a multiple pass
	decoding strategy with adaptation, results are sensitive to both
	initial audio segmentation and the scheme for clustering segments
	prior to adaptation: the combination of our best automatic segmentation
	and clustering scheme has an error rate 8% worse (relative) to manual
	audio segmentation and clustering due to the occurrence of ``speaker-impure''
	segments.},
  PDF = {pubs/euro03-seg.pdf}
}

@INPROCEEDINGS{ramabhadran_icassp03,
  AUTHOR = {B. Ramabhadran and J. Huang and M. Picheny},
  TITLE = {Towards Automatic Transcription of Large Spoken Archives - {E}nglish
	{ASR} for the {MALACH} project},
  BOOKTITLE = {International Conference on Acoustics, Speech, and Signal Processing},
  YEAR = {2003},
  ABSTRACT = {Digital archives have emerged as the pre-eminent method for capturing
	the human experience. Before such archives and be used efficiently,
	their content must be described. The NSF-funded MALACH project aims
	to provide improved access to large spoken archives by advancing
	the state-of-the-art in automatic speech recognition, information
	retrieval, and related technologies for multiple languages. This
	paper describes the ASR research for the English speech in the MALACH
	corpus. The MALACH corpus consists of unconstrained, natural speech
	filled with disfluencies, heavy accents, age-related coarticulation,
	uncued speaker and language switching, and emotional speech collected
	in the form of interviews from over 52,000 speakers in 32 languages.
	In this paper, we describe this new testbed for developing speech
	recognition algorithms and report on the performance of well-known
	techniques for building better acoustic models for the speaking
	styles seen in this corpus. The best English ASR system to date
	has a word error rate of 43.8\% on this corpus.},
  PDF = {pubs/icassp03-g001a.pdf}
}

@CONFERENCE{BR_04_Use_Metadata,
  AUTHOR = {B. Ramabhadran and O. Siohan and G. Zweig},
  TITLE = {Use of Metadata to Improve Recognition of Spontaneous Speech and
	Named Entities},
  BOOKTITLE = {International Conference on Spoken Language Processing},
  YEAR = {2004},
  OWNER = {dkw},
  TIMESTAMP = {2006.07.17}
}

@TECHREPORT{HGR_05_Errors_ASR,
  AUTHOR = {H. Gladfelter Rubin and D. Soergel},
  TITLE = {A Typology of Errors in {ASR} Transcriptions of Oral History Interviews},
  INSTITUTION = {University of Maryland, College of Information Studies},
  YEAR = {2005},
  TYPE = {{MALACH} Technical Report},
  ADDRESS = {College Park},
  MONTH = {August},
  OWNER = {dkw},
  PDF = {pubs/HGR_05_Errors_ASR.pdf},
  TIMESTAMP = {2006.08.02}
}

@INPROCEEDINGS{AS_04_Meas_Conv,
  AUTHOR = {A. Sethy and S. Narayanan and B. Ramabhadran},
  TITLE = {Measuring Convergence in Language Model Estimation Using Relative
	Entropy},
  BOOKTITLE = {International Conference on Spoken Language Processing},
  YEAR = {2004},
  ABSTRACT = {Language models are generally estimated using smoothed counting techniques.
	These counting schemes can be viewed as non linear functions operating
	on a Bernoulli process which converge asymptotically to the true
	density. The rate at which these counting schemes converge to the
	true density is constrained by the training data set available and
	the nature of the language model (LM) being estimated. In this paper
	we look at language model estimates as random variables and present
	an efficient relative entropy (R.E) based approach to study their
	convergence with increasing training data size. We present experimental
	results for language modeling in a generic LVCSR system and a medical
	domain dialogue task. We also present an efficient recursive R.E
	computation method which can be used as a LM distance measure for
	a number of tasks including LM clustering.},
  OWNER = {dkw},
  PDF = {pubs/AS_04_Meas_Conv.pdf},
  TIMESTAMP = {2006.07.17}
}

@INPROCEEDINGS{sethy_asru03,
  AUTHOR = {A. Sethy and B. Ramabhadran and S. Narayanan},
  TITLE = {Improvements in {ASR} for the {MALACH} Project Using Syllable-Centric
	Models},
  BOOKTITLE = {Proceedings of the {IEEE} Automatic Speech Recognition and Understanding
	Workshop},
  YEAR = {2003},
  ADDRESS = {St. Thomas},
  MONTH = {December},
  ABSTRACT = {LVCSR systems have traditionally used phones as the basic acoustic
	unit for recognition. Syllable and other longer length units provide
	an efficient means for modeling long-term temporal dependencies
	in speech that are difficult to capture in a phone based recognition
	framework. However, it is well known that longer duration units
	suffer from training data sparsity problems since a large number
	of units in the lexicon will have little or no acoustic training
	data. Previous research has shown that syllable-based modeling provides
	improvements over word internal systems, but performance has lagged
	behind crossword context-dependent systems. In this paper, we describe
	a syllable-centric approach to English LVCSR for the MALACH (Multilingual
	Access to Large spoken ArCHives) project. The combined modeling
	of syllables and context-dependent phones provides a 0.5% absolute
	improvement in recognition accuracy over the state-of-the-art cross
	word system for the heavily accented and spontaneous speech seen
	in oral history archives. More importantly, we report on the importance
	of the improved recognition of names and concepts that is crucial
	for subsequent search and retrieval.},
  PDF = {pubs/asru03-syl.pdf}
}

@INPROCEEDINGS{IS_04_Task_Specific,
  AUTHOR = {I. Shafran and W. Byrne},
  TITLE = {Task-Specific Minimum Bayes-risk Decoding Using Learned Edit Distance},
  BOOKTITLE = {Proc. of the International Conference on Spoken Language Processing},
  YEAR = {2004},
  ABSTRACT = {This paper extends the minimum Bayes-risk framework to incorporate
	a loss function specific to the task and the ASR system. The errors
	are modeled as a noisy channel and the parameters are learned from
	the data. The resulting loss function is used in the risk criterion
	for decoding. Experiments on a large vocabulary conversational speech
	recognition system demonstrate significant gains of about 1% absolute
	over MAP hypothesis and about 0.6% absolute over untrained lossfunction.
	The approach is general enough to be applicable to other sequence
	recognition problems such as in Optical Character Recognition (OCR)
	and in analysis of biological sequences.},
  OWNER = {dkw},
  PDF = {pubs/IS_04_Task-spec_Min_Bayes.pdf},
  TIMESTAMP = {2006.07.15}
}

@INPROCEEDINGS{IS_06_Corr_Models,
  AUTHOR = {I. Shafran and K. Hall},
  TITLE = {Corrective Models for Speech Recognition of Inflected Languages},
  BOOKTITLE = {Proc. of the Conference on Empirical Methods in Natural Language
	Processing (EMNLP)},
  YEAR = {2006},
  ADDRESS = {Sydney, Australia},
  MONTH = {July},
  ABSTRACT = {This paper presents a corrective model for speech recognition of inflected
	languages. The model, based on a discriminative framework, incorporates
	word ngrams features as well as factored morphological features,
	providing error reduction over the model based solely on word n-gram
	features. Experiments on a large vocabulary task, namely the Czech
	portion of the MALACH corpus, demonstrate performance gain of about
	1.1–1.5% absolute in word error rate, wherein rphological features
	contribute about a third of the improvement. A simple feature selection
	mechanism based on x^2 statistics is shown to be effective in reducing
	the number of features by about 70% without any loss in performance,
	making it feasible to explore yet larger feature spaces.},
  OWNER = {dkw},
  PDF = {pubs/IS_06_Corr_Models.pdf},
  TIMESTAMP = {2006.07.15}
}

@INPROCEEDINGS{OS_05_Fast_Vocab,
  AUTHOR = {O. Siohan and M. Bacchiani},
  TITLE = {Fast Vocabulary-Independent Audio Search Using Path-Based Graph Indexing},
  BOOKTITLE = {{INTERSPEECH} 2005, Eurospeech},
  YEAR = {2005},
  OWNER = {dkw},
  TIMESTAMP = {2006.07.17}
}

@INPROCEEDINGS{OS_05_Constuct_ensembles,
  AUTHOR = {O. Siohan and B. Ramabhadran and B. Kingsbury},
  TITLE = {Constructing Ensembles of {ASR} Systems Using Randomized Decision
	Trees},
  BOOKTITLE = {Proc. of the International Conference on Acoustics, Speech, and Signal
	Processing},
  YEAR = {2005},
  ABSTRACT = {Building multiple automatic speech recognition (ASR) systems and combining
	their outputs using voting techniques such as ROVER is an effective
	technique for lowering the overall word error rate. A successful
	system combination approach requires the construction of multiple
	systems with complementary errors, or the combination will not outperform
	any of the individual systems. In general, this is achieved empirically,
	for example by building systems on different input features. In
	this paper, we present a systematic approach for building multiple
	ASR systems in which the decision tree statetying procedure that
	is used to specify context-dependent acoustic models is randomized.
	Experiments carried out on two large vocabulary recognition tasks,
	MALACH and DARPA EARS, illustrate the effectiveness of the approach.},
  OWNER = {dkw},
  PDF = {pubs/OS_05_Constuct_Ensembles.pdf},
  TIMESTAMP = {2006.07.17}
}

@INPROCEEDINGS{OS_04_Speech_Recog,
  AUTHOR = {O. Siohan and B. Ramabhadran and G. Zweig},
  TITLE = {Speech Recognition Error Analysis on the {E}nglish {MALACH} Corpus},
  BOOKTITLE = {International Conference on Spoken Language Processing},
  YEAR = {2004},
  OWNER = {dkw},
  TIMESTAMP = {2006.07.17}
}

@MISC{DS_06_Das_Malach_Projekt,
  AUTHOR = {D. Soergel},
  TITLE = {Das MALACH Projekt. Zugang zu mündlich überlieferter Geschichte durch
	verbesserte Methoden im Sprachretrieval. Spracherkennung, Sprachverarbeitung,
	Retrievalmethoden, Benutzerschnittstelle
	
	Presentation},
  HOWPUBLISHED = {University of Saarbruecken},
  MONTH = {February},
  YEAR = {2005},
  OWNER = {dkw},
  TIMESTAMP = {2006.08.01}
}

@TECHREPORT{malach_cistr_2002,
  AUTHOR = {D. Soergel and D.W. Oard and S. Gustman and L. Fraser and J. Kim
	and J. Meyer and E. Proffen and T. Sartori},
  TITLE = {The Many Uses of Digitized Oral History Collections: Implications
	for Design},
  INSTITUTION = {College of Information Studies, University of Maryland},
  YEAR = {2002},
  ABSTRACT = {Oral history - and spoken word collections generally - are assuming
	increasing importance in digital libraries as the storage, transmission
	and reproduction infrastructure improves. This paper describes three
	synergistic approaches to user needs analysis, explains how they
	are being applied to guide the design of systems to provide access
	oral history collections (using as a test bed the Shoah Foundations
	collection of over 50,000 videotaped oral history interviews), presents
	preliminary results from so-called ``discount requirements analysis''
	of a wide variety of users and uses of oral history data and a concomitant
	variety of access points that would be useful.},
  PDF = {pubs/MALACHTechReportUses.pdf}
}

@INPROCEEDINGS{ST_05_Acoustic_Trng,
  AUTHOR = {S. Tsakalidis and W. Byrne},
  TITLE = {Acoustic Training from Heterogeneous Data Sources: Experiments in
	{M}andarin Conversational Telephone Speech Transcription},
  BOOKTITLE = {{IEEE} Conference on Acoustics, Speech and Signal Processing},
  YEAR = {2005},
  OWNER = {dkw},
  PDF = {pubs/ST_05_Acoust_Trng.pdf},
  TIMESTAMP = {2006.07.15}
}

@INPROCEEDINGS{VV_05_Lat_Seg,
  AUTHOR = {V. Venkataramani and W. Byrne},
  TITLE = {Lattice Segmentation and Support Vector Machines for Large Vocabulary
	Continuous Speech Recognition},
  BOOKTITLE = {{IEEE} Conference on Acoustics, Speech and Signal Processing},
  YEAR = {2005},
  ADDRESS = {Philadelphia},
  ABSTRACT = {Lattice segmentation procedures are used to spot possible recognition
	errors in first-pass recognition hypotheses produced by a large
	vocabulary continuous speech recognition system. This approach is
	analyzed in terms of its ability to reliably identify, and provide
	good alternatives for, incorrectly hypothesized words. A procedure
	is described to train and apply Support Vector Machines to strengthen
	the first pass system where it was found to be weak, resulting in
	small but statistically significant recognition improvements on
	a large test set of conversational speech.},
  OWNER = {dkw},
  PDF = {pubs/VV_05_Lat_Seg.pdf},
  TIMESTAMP = {2006.07.15}
}

@ARTICLE{VV_06_Gini_support,
  AUTHOR = {V. Venkataramani and S. Chakrabartty and W. Byrne},
  TITLE = {Gini Support Vector Machines for Segmental Minimum {B}ayes Risk Decoding
	of Continuous Speech},
  JOURNAL = {Computer Speech and Language},
  YEAR = {2006},
  NOTE = {Accepted; in revision},
  ABSTRACT = {We describe the use of Support Vector Machines (SVMs) for continuous
	speech recognition by incorporating them in Segmental Minimum Bayes
	Risk decoding. Lattice cutting is used to convert the Automatic
	Speech Recognition search space into sequences of smaller recognition
	problems. SVMs are then trained as discriminative models over each
	of these problems and used in a rescoring framework. We pose the
	estimation of a posterior distribution over hypothesis in these
	regions of acoustic confusion as a logistic regression problem.
	We also show that GiniSVMs can be used as an approximation technique
	to estimate the parameters of the logistic regression problem. On
	a small vocabulary recognition task we show that the use of GiniSVMs
	can improve the performance of a well trained Hidden Markov Model
	system trained under the Maximum Mutual Information criterion. We
	also find that it is possible to derive reliable confidence scores
	over the GiniSVM hypotheses and that these can be used to good effect
	in hypothesis combination. We discuss the problems that we expect
	to encounter in extending this approach to Large Vocabulary Continuous
	Speech Recognition and describe initial investigation of constrained
	estimation techniques to derive feature spaces for SVMs.},
  OWNER = {dkw},
  PDF = {pubs/VV_06_Gini_Support.pdf},
  TIMESTAMP = {2006.07.15}
}

@INPROCEEDINGS{asru03_svmsmbr,
  AUTHOR = {V. Venkataramani and S. Chakrabartty and W. Byrne},
  TITLE = {Support Vector Machines for Segmental Minimum {B}ayes Risk Decoding
	of Continuous Speech},
  BOOKTITLE = {{IEEE} Automatic Speech Recognition and Understanding Workshop},
  YEAR = {2003},
  ABSTRACT = {Segmental Minimum Bayes Risk (SMBR) Decoding involves the refinement
	of the search space into manageable confusion sets, {\it i.e.,}
	smaller sets of confusable words. We describe the application of
	Support Vector Machines (SVMs) as discriminative models for the
	refined search space. We show that SVMs, which in their basic formulation
	are binary classifiers of fixed dimensional observations, can be
	used for continuous speech recognition. We also study the use of
	$Gini$SVMs, which is a variant of the basic SVM. On a small vocabulary
	task, we show this two pass scheme outperforms MMI trained HMMs.
	Using system combination we also obtain further improvements over
	discriminatively trained HMMs.},
  PDF = {pubs/asru03_smbr_svm.pdf}
}

@INPROCEEDINGS{JW_05_CLEF_2005,
  AUTHOR = {J. Wang and D.W. Oard},
  TITLE = {{CLEF} 2005 {CL-SR} at Maryland: Document and Query Expansion Using
	Side Collections and Thesauri},
  BOOKTITLE = {Working Notes for the {CLEF-2005} Workshop},
  YEAR = {2005},
  ADDRESS = {Vienna, Austria},
  ABSTRACT = {This paper reports results for the University of Maryland's participation
	in CLEF-2005 Cross-Language Speech Retrieval track. Techniques that
	were tried include: (1) document expansion with manually created
	metadata (thesaurus keywords and segment summaries) from a large
	side collection, (2) query refinement with pseudo-relevance feedback,
	(3) keyword expansion with thesaurus synonyms, and (4) cross-language
	speech retrieval using translation knowledge obtained from the statistics
	of a large parallel corpus. The results show that document expansion
	and query expansion using blind relevance feedback were effective,
	although optimal parameter choices differed somewhat between the
	training and evaluation sets. Document expansion in which manually
	assigned keywords were augmented with thesaurus synonyms yielded
	marginal gains on the training set, but no improvement on the evaluation
	set. Cross-language retrieval with French queries yielded 79% of
	monolingual mean average precision when searching manually assigned
	metadata despite a substantial domain mis-match between the parallel
	corpus and the retrieval task. Detailed failure analysis indicates
	that speech recognition errors for named entities were an important
	factor that substantially degraded retrieval effectiveness.},
  KEYWORDS = {Speech Retrieval, Document Expansion, Query Expansion, Blind Relevance
	Feedback},
  OWNER = {dkw},
  PDF = {pubs/JW_05_Query_Expan.pdf},
  TIMESTAMP = {2006.08.01}
}

@INPROCEEDINGS{RW_05_chrono_metadata,
  AUTHOR = {R.W. White},
  TITLE = {Chronological Metadata},
  BOOKTITLE = {Proceedings of the 28th Annual International ACM SIGIR Conference
	on Research and Development in Information Retrieval},
  YEAR = {2005},
  OWNER = {dkw},
  TIMESTAMP = {2006.07.17}
}

@INPROCEEDINGS{RW_05_CLEF_overview,
  AUTHOR = {R.W. White and D.W. Oard and G.J.F. Jones and D. Soergel and X. Huang},
  TITLE = {Overview of the {CLEF}-2005 Cross-Language Speech Retrieval Track},
  BOOKTITLE = {Cross-Language Evaluation Forum},
  YEAR = {2005},
  ADDRESS = {Vienna, Austria},
  MONTH = {September},
  ABSTRACT = {The task for the CLEF-2005 cross-language speech retrieval track was
	to identify topically coherent segments of English interviews in
	a known-boundary condition. Seven teams participicipated, performing
	both monolingual and cross-language searches of ASR transcripts,
	automatically generated metadata, and manually generated metadata.
	Results indicate that monolingual search technology is sufficiently
	accurate to be useful for some purposes (the best mean average precision
	was 0.18) and cross-language searching yielded results typical of
	those seen in other applications (with the best systems approximating
	monolingual mean average precision).},
  OWNER = {dkw},
  PDF = {pubs/RW_05_CLEF-Overview.pdf},
  TIMESTAMP = {2006.07.17}
}

@INPROCEEDINGS{RW_06_ConceptMaps,
  AUTHOR = {R.W. White and H. Song and J. Liu},
  TITLE = {Concept Maps to Support Oral History Search and Use},
  BOOKTITLE = {{JCDL} '06: Proceedings of the 6th ACM/IEEE-CS joint conference on
	Digital Libraries},
  YEAR = {2006},
  PAGES = {192-193},
  ADDRESS = {Chapel Hill, NC, USA},
  PUBLISHER = {ACM Press, New York, NY, USA},
  ABSTRACT = {In this paper we describe a novel technique to support information
v	seeking in oral history archives using concept maps. We conducted
	a pilot study with teachers engaged in work tasks using a prototype
	concept mapping tool. Results suggest that concept maps can help
	searchers, especially when tasks are complex.},
  DOI = {http://doi.acm.org/10.1145/1141753.1141794},
  OWNER = {dkw},
  PDF = {pubs/RW_06_ConceptMaps.pdf},
  TIMESTAMP = {2006.08.01}
}

@TECHREPORT{PZ_06_Knowl-Based,
  AUTHOR = {P. Zhang and D. Soergel},
  TITLE = {Knowledge-Based Approaches to the Segmentation of Oral History Interviews},
  INSTITUTION = {University of Maryland, College of Information Studies},
  YEAR = {2006},
  TYPE = {{MALACH} Technical Report},
  ADDRESS = {College Park},
  MONTH = {May},
  ABSTRACT = {This paper applies discourse knowledge to the segmentation of speech
	transcripts. The paper reviews literature on discourse structure,
	as well as approaches used in text segmentation and speech segmentation,
	identifies what features are used and how the features are combined
	in these approaches. After reviewing the literature, a three-part
	study is conducted to answer the following three research questions:
	
	Are discourse-markers indicators of segment boundaries in oral history
	interviews?
	
	Are questions good indicators of segment boundaries? Could questions
	be used as segment boundary or segment continuation indicators?
	
	Do the discourse structures proposed by Labov and Waletzky (1967,
	1997) and Stein and Glenn (1979) hold for oral history interviews?
	How could this knowledge be used in automatic segmentation?
	
	Methodology, results and analysis of each part of the study are described.
	Major findings include trends in segmentation and answers to these
	questions. Limitation of the study is discussed. The paper also
	suggests future research topic relates to segmentation and discourse
	analysis.},
  OWNER = {dkw},
  PDF = {pubs/PZ_06_Knowl-Based.pdf},
  TIMESTAMP = {2006.08.02}
}

@TECHREPORT{IS_06_Acoustic,
  AUTHOR = {I. Shafran},
  TITLE = {Acoustic and Language Modeling for Czech ASR in MALACH},
  INSTITUTION = {The Johns Hopkins University, The Center for Language and Speech Processing},
  YEAR = {2006},
  TYPE = {{CLSP} Research Note},
  ADDRESS = {Baltimore, MD},
  MONTH = {August},
  OWNER = {katyn},
  PDF = {pubs/IS_06_Acoustic.pdf},
  TIMESTAMP = {2006.09.20}
}


This file has been generated by bibtex2html 1.77