testing.bib

%%% testing.bib -- Testing bibliography (selective)


@unpublished{EWD:EWD303,
   author = "Edsger W. Dijkstra",
   title = "EWD303: On the reliability of programs",
   year = "n.d.",
   url = "http://www.cs.utexas.edu/users/EWD/ewd03xx/EWD303.PDF",
   }


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Impact and cost of poor quality software and testing
%%%


@TechReport{NIST02-3,
  author = 	 "{Research Triangle Institute}",
  title = 	 "The Economic Impacts of Inadequate Infrastructure for Software Testing",
  institution =  "National Institute of Standards and Technology",
  year = 	 "2002",
  OPTkey = 	 "",
  type = 	 "NIST Planning Report",
  number = 	 "02-3",
  OPTaddress = 	 "",
  month = 	 may,
  OPTnote = 	 "",
  OPTannote = 	 ""
}


@InProceedings{Hartman2002,
  author = 	 "A. Hartman",
  title = 	 "Is {ISSTA} research relevant to industry?",
  crossref =     "ISSTA2002",
  pages =	 "205--206",
}


@InProceedings{HooimeijerW2007,
  author =       "Hooimeijer, Pieter and Weimer, Westley",
  title =        "Modeling bug report quality",
  crossref =     "ASE2007",
  pages =     "34-43",
}


@Article{AlyA1988,
  author =       "Nael A. E. Aly and Adel A. Aly",
  title =        "Measures of testability for automatic diagnostic systems",
  journal =      IEEETR,
  year =         1988,
  volume =    37,
  number =    5,
  pages =     "531-538",
  month =     dec,
}


@Book{Beizer90,
Author = {Beizer, B},
title = {Software Testing Techniques},
publisher = {Boston: International Thomson Computer Press},
year = {1990},
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Empirical evaluation of unit testing & TDD (test-driven development)
%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Software reliability and fault injection models
%%%


@Article{goel85a,
 key     = "goel85a",
 author  = "Amrit L. Goel",
 title   = "Software Reliability Models: Assumptions, Limitations, and
          Applicability",
 journal = TSE,
 month   = dec,
 year    = "1985",
 volume  = "SE-11",
 number  = "12",
 pages   = "1411--1423"
}


@InProceedings{1989:icse:ohba,
  author =       "Mitsuru Ohba and Xiao-Mei Chou",
  title =        "Does Imperfect Debugging Affect Software Reliability
                 Growth?",
  crossref =     "ICSE89",
  pages =        "237--244",
  ISSN =         "0270-5257",
  ISBN =         "0-89791-258-6 (IEEE), 0-8186-1941-4 (ACM)",
  genterms =     "DESIGN, LANGUAGES, MEASUREMENT, RELIABILITY",
  categories =   "D.2.5 Software, SOFTWARE ENGINEERING, Testing and
                 Debugging. D.2.8 Software, SOFTWARE ENGINEERING,
                 Metrics, Performance measures.",
  abstract =     "This paper discusses the improvement of conventional
                 software reliability growth models by elimination of
                 the unreasonable assumption that errors or faults in a
                 program can be perfectly removed when they are
                 detected. The results show that exponential-type
                 software reliability growth models that deal with
                 error-counting data could be used even if the perfect
                 debugging assumption were not held, in which case the
                 interpretation of the model parameters should be
                 changed. An analysis of real project data is
                 presented.",
  annote =       "incomplete",
}


@Article{EickGKMM2001,
  author = 	 "Stephen G. Eick and Todd L. Graves and Alan F. Karr and J. S. Marron and Audris Mockus",
  title = 	 "Does code decay? Assessing the evidence from change
                  management data",
  journal = 	 TSE,
  year = 	 2001,
  volume =	 27,
  number =	 1,
  pages =	 "1--12",
  month =	 jan,
  abstract =
   "A central feature of the evolution of large software systems is that change
    --- which is necessary to add new functionality, accommodate new hardware
    and repair faults --- becomes increasingly difficult over time. In this
    paper we approach this phenomenon, which we term code decay, scientifically
    and statistically. We define code decay, and propose a number of
    measurements (code decay indices) on software, and on the organizations
    that produce it, that serve as symptoms, risk factors and predictors of
    decay. Using an unusually rich data set (the fifteen-plus year change
    history of the millions of lines of software for a telephone switching
    system), we find mixed but on the whole persuasive statistical evidence of
    code decay, which is corroborated by developers of the code. Suggestive,
    but not yet fully assimilated, indications that perfective maintenance can
    retard code decay are also discussed.",
}


@Article{GravesKMS2000,
  author = 	 "Todd L. Graves and Alan F. Karr and J. S. Marron and Harvey Siy",
  title = 	 "Predicting fault incidence using software change history",
  journal = 	 TSE,
  year = 	 2000,
  volume = 	 26,
  number = 	 7,
  pages =	 "653--661",
  month =	 jul,
  abstract =
   "This paper is an attempt to understand the processes by which software
    ages. We define code to be aged or decayed if its structure makes it too
    difficult to understand or change, and we measure the extent of decay by
    counting the number of faults in code in a period of time. Using change
    management data from a very large, long-lived software system, we explore
    the extent to which measurements from the change history are successful in
    predicting the distribution over modules of these incidences of faults. In
    general, process measures based on the change history are more useful in
    predicting fault rates than product metrics of the code: for instance, the
    number of times code has been changed is a better indication of how many
    faults it will contain than is its length. We also compare the fault rates
    of code of various ages, finding that if a module is on the average a year
    older than an otherwise similar module, the older module will have roughly
    a third fewer faults. Our most successful model measures the fault
    potential of a module as a sum of contributions from all of the times the
    module has been changed, with large, recent changes receiving the most
    weight."
}


@Article{ChristensonH96,
  author = 	 "Dennis A. Christenson and Steel T. Huang",
  title = 	 "Estimating the fault content of software using the fix-on-fix model",
  journal = 	 "Bell Labs Technical Journal",
  year = 	 1996,
  volume =	 1,
  number =	 1,
  pages =	 "130--137",
  month =	 "Summer",
  abstract =
   "In statistical theory, the percentage of defects in a randomly
    drawn sample is an estimate of the percentage of defects in the
    entire population.  When this concept is applied to the process of
    fixing faults during software development, a new fix-on-fix model
    results. Such a model can predict the number of software faults,
    thus providing a useful quality assessment. The model discussed in
    this paper implements the concepts of BF and FOF, which have been
    used in the 5ESS\textregistered-2000 switch project for several years.  The FOF
    model is similar to error seeding models in which predetermined
    errors are planted in the code. The number of remaining errors can
    be predicted based on the number of original errors seeded and the
    number of both seeded and nonseeded errors found during
    testing. The model may initiate a new approach to software quality
    prediction, and it has the advantage of being independent of
    testing intensity, methodology, and environment. The FOF model is
    applicable to any software product in which BF and FOF rates can be
    measured from source-code management systems.",
}


@Article{YuSD88,
  author = 	 "T.-J. Yu and V. Y. Shen and H. E. Dunsmore",
  title = 	 "An analysis of several software defect models",
  journal = 	 TSE,
  year = 	 1988,
  volume =	 14,
  number =	 9,
  pages =	 "1261--1270",
  month =	 sep,
  abstract =
   "Results are presented of an analysis of several defect models using data
    collected from two large commercial projects. Traditional models typically
    use either program matrices (i.e. measurements from software products) or
    testing time or combinations of these as independent variables. The
    limitations of such models have been well-documented. The models considered
    use the number of defects detected in the earlier phases of the development
    process as the independent variable. This number can be used to predict the
    number of defects to be detected later, even in modified software
    products. A strong correlation between the number of earlier defects and
    that of later ones was found. Using this relationship, a mathematical model
    was derived which may be used to estimate the number of defects remaining
    in software. This defect model may also be used to guide software
    developers in evaluating the effectiveness of the software development and
    testing processes.",
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Effectiveness of various testing strategies
%%%

@Article{1998:tse:frankl,
  title =        "Evaluating Testing Methods by Delivered Reliability",
  author =       "Phyllis G. Frankl and Richard G. Hamlet and Bev
                 Littlewood and Lorenzo Strigini",
  pages =        "586--601",
  journal =      TSE,
  ISSN =         "0098-5589",
  year =         "1998",
  volume =       "24",
  month =        aug,
  number =       "8",
  abstract =     "There are two main goals in testing software: 1) to
                 achieve adequate quality (\emph{debug testing}); the
                 objective is to probe the software for defects so that
                 these can be removed and 2) to assess existing quality
                 (\emph{operational testing}); the objective is to gain
                 confidence that the software is reliable. The names are
                 arbitrary, and most testing techniques address both
                 goals to some degree. However, debug methods tend to
                 ignore random selection of test data from an
                 operational profile, while for operational methods this
                 selection is all-important. Debug methods are thought,
                 without any real proof, to be good at uncovering
                 defects so that these can be repaired, but having done
                 so they do not provide a technically defensible
                 assessment of the reliability that results. On the
                 other hand, operational methods provide accurate
                 assessment, but may not be as useful for achieving
                 reliability. This paper examines the relationship
                 between the two testing goals, using a probabilistic
                 analysis. We define simple models of programs and their
                 testing, and try to answer theoretically the question
                 of how to attain program reliability: Is it better to
                 test by probing for defects as in debug testing, or to
                 assess reliability directly as in operational testing,
                 uncovering defects by accident, so to speak? There is
                 no simple answer, of course. Testing methods are
                 compared in a model where program failures are detected
                 and the software changed to eliminate them. The
                 ``better'' method delivers higher reliability after all
                 test failures have been eliminated. This comparison
                 extends previous work, where the measure was the
                 probability of detecting a failure. Revealing special
                 cases are exhibited in which each kind of testing is
                 superior. Preliminary analysis of the distribution of
                 the delivered reliability indicates that even simple
                 models have unusual statistical properties, suggesting
                 caution in interpreting theoretical comparisons.",
  keywords =     "Reliability, debugging, software testing, statistical
                 testing theory",
  correctedby =  "\cite{1999:tse:frankl}",
  note =         "Special Section: International Conference on Software
                 Engineering (ICSE~'97)",
  annote =       "incomplete",
}

@Article{1999:tse:frankl,
  title =        "Correction to: Evaluating Testing Methods by Delivered
                 Reliability",
  author =       "Phyllis Frankl and Dick Hamlet and Bev Littlewood and
                 Lorenzo Strigini",
  pages =        "286",
  journal =      TSE,
  ISSN =         "0098-5589",
  year =         "1999",
  volume =       "25",
  month =        mar # "/" # apr,
  number =       "2",
  corrects =     "\cite{1998:tse:frankl}",
  references =   "\cite{1998:tse:frankl}",
  annote =       "checked",
}


@misc{ irvine-effectiveness,
  author = "A. Irvine and A. Offutt",
  title = "The Effectiveness of Category-Partition Testing of Object-Oriented Software",
  text = "Alisa Irvine and A. Jefferson Offutt, The Effectiveness of Category-Partition
    Testing of Object-Oriented Software, ISSE Department George Mason University,
    Fairfax, VA 22030.",
  url = "citeseer.nj.nec.com/irvine95effectiveness.html"
}

@misc{ tewary-empirical,
    author = "A. Jefferson Offutt, Kanupriya Tewary",
    title = "Empirical Comparisons of Data Flow and Mutation Testing",
    url = "citeseer.nj.nec.com/offutt92empirical.html"
}

@misc{ offutt94experiments,
    author = "A. Offutt and J. Pan and K. Tewary and T. Zhang",
    title = "Experiments with data flow and mutation testing",
    text = "A. J. Offutt, J. Pan, K. Tewary, and T. Zhang. Experiments with
	    data flow and mutation testing. Technical Report
	    ISSE-TR-94-105, Department of Information and Software Systems
	    Engineering, George Mason University, Fairfax, Virginia, 1994.",
    year = "1994",
    url = "citeseer.nj.nec.com/offutt94experiments.html"
}


@Article{HowdenH95,
  author = 	 "W. E. Howden and Yudong Huang",
  title = 	 "Software trustability analysis",
  journal = 	 TOSEM,
  year = 	 1995,
  volume =	 4,
  number =	 1,
  pages =	 "36--64",
  month =	 jan
}


@InProceedings{BohmeP2014,
  author = 	 "B{\"o}hme, Marcel and Paul, Soumya",
  authorASCII =  "Bohme, Marcel and Paul, Soumya",
  authorASCII =  "Boehme, Marcel and Paul, Soumya",
  title = 	 "On the efficiency of automated testing",
  crossref =     "FSE2014",
  pages = 	 "632--642",
}


%%%
%%% Random testing
%%%


@InCollection{Hamlet94,
  author = 	 "Dick Hamlet",
  title = 	 "Random Testing",
  booktitle = 	 "Encyclopedia of Software Engineering",
  publisher =    "John Wiley and Sons",
  year =         1994
}


@Article{1990:tse:hamlet,
  title =        "Partition Testing Does Not Inspire Confidence",
  author =       "Dick Hamlet and Ross Taylor",
  pages =        "1402--1411",
  journal =      TSE,
  ISSN =         "0098-5589",
  year =         "1990",
  volume =       "16",
  month =        dec,
  number =       "12",
  referencedby = "\cite{1997:icse:bernot}, \cite{1997:icse:frankl},
                 \cite{1998:tse:frankl}, \cite{1999:tosem:podgurski}",
  annote =       "incomplete",
  abstract =     "Partition testing, in which a program's input domain is
		  divided according to some rule and tests conducted within
		  the subdomains, enjoys a good reputation.  However,
		  comparison between testing that observes subdomain
		  boundaries and random sampling that ignores the partition
		  gives the counterintuitive result that partitioning is of
		  little value.  In this paper we improve the negative
		  results published about partition testing, and try to
		  reconcile them with its intuitive value.
		  Theoretical models allow us to study partition testing in
		  the abstract, and to describe the circumstances under
		  which it should perform well at failure detection.
		  Partition testing is shown to be more valuable when the
		  partitions are narrowly based on expected failures and
		  there is a good chance that failures occur.  For gaining
		  confidence from successful tests, partition testing as
		  usually practiced has little value."
}


@Article{1984:tse:duran,
  title =        "An Evaluation of Random Testing",
  author =       "Joe W. Duran and Simeon C. Ntafos",
  pages =        "438--444",
  journal =      TSE,
  ISSN =         "0098-5589",
  year =         "1984",
  volume =       "10",
  month =        jul,
  number =       "4",
  referencedby = "\cite{1997:icse:bernot}, \cite{1997:icse:frankl},
                 \cite{1998:tse:frankl}, \cite{1999:tosem:podgurski}",
  annote =       "incomplete",
}


@Article{MillerMNPNMV92,
  author = 	 "Keith W. Miller and Larry J. Morell and Robert E. Noonan
                  and Stephen K. Park and David M. Nichol and Branson
                  W. Murrill and Jeffrey M. Voas",
  title = 	 "Estimating the probability of failure when testing reveals
                  no failures",
  journal = 	 TSE,
  year = 	 1992,
  volume =	 18,
  number =	 1,
  pages =	 "33--43",
  month =	 jan
}


@Article{MillerFS90,
  author = 	 "Barton P. Miller and Louis Fredriksen and Bryan So",
  title = 	 "An empirical study of the reliability of {UNIX} utilities",
  journal = 	 CACM,
  year = 	 1990,
  volume =	 33,
  number =	 12,
  pages =	 "32--44",
  month =	 dec,
  doi =	 	 "https://doi.acm.org/10.1145/96267.96279",
}


@InProceedings{Xie2006,
  author = 	 "Tao Xie",
  title = 	 "Augmenting automatically generated unit-test suites with
                  regression oracle checking",
  crossref =     "ECOOP2006",
  pages = 	 "380--403",
}


@InProceedings{JiangZCT2009,
  author = 	 "Jiang, Bo and Zhang, Zhenyu and Chan, W. K. and Tse, T. H.",
  title = 	 "Adaptive Random Test Case Prioritization",
  crossref =     "ASE2009",
  pages = 	 "233--244",
}


@Article{SoaresGSM2010,
  author = 	 "Soares, Gustavo and Gheyi, Rohit and Serey, Dalton and Massoni, Tiago",
  title = 	 "Making program refactoring safer",
  journal = 	 IEEESoftware,
  year = 	 2010,
  volume = 	 27,
  number = 	 4,
  pages = 	 "52--57",
  month = 	 jul # "/" # aug,
}


@InProceedings{KleinFF2010,
  author = 	 "Klein, Casey and Flatt, Matthew and Findler, Robert Bruce",
  title = 	 "Random testing for higher-order, stateful programs",
  crossref =     "OOPSLA2010",
  pages = 	 "555--566",
}


@InProceedings{PradelG2012,
  author = 	 "Pradel, Michael and Gross, Thomas R.",
  title = 	 "Leveraging test generation and specification mining for automated bug detection without false positives",
  crossref =     "ICSE2012",
  pages = 	 "288--298",
}

@InProceedings{GaneshLR2009,
  author = 	 "Ganesh, Vijay and Leek, Tim and Rinard, Martin",
  title = 	 "Taint-based directed whitebox fuzzing",
  crossref =     "ICSE2009",
  pages = 	 "474--484",
}

@InProceedings{TanMTL2012,
  author = 	 "Shin Hwei Tan and Darko Marinov and Lin Tan and Gary T. Leavens",
  title = 	 "{@tComment}: Testing {Javadoc} Comments to Detect Comment-Code Inconsistencies",
  crossref =     "ICST2012",
  pages = 	 "260--269",
}


@inproceedings{Garg2013,
abstract = {In industry, software testing and coverage-based metrics are the
predominant techniques to check correctness of software. This paper addresses
automatic unit test generation for programs written in C/C++. The main idea is to
improve the coverage obtained by feedback-directed random test generation
methods, by utilizing concolic execution on the generated test drivers.
Furthermore, for programs with numeric computations, we employ non-linear solvers
in a lazy manner to generate new test inputs. These techniques significantly
improve the coverage provided by a feedback-directed random unit testing
framework, while retaining the benefits of full automation. We have implemented
these techniques in a prototype platform, and describe promising experimental
results on a number of C/C++ open source benchmarks. © 2013 IEEE.},
author = {Garg, Pranav and Ivancic, Franjo and Balakrishnan, Gogul and Maeda,
Naoto and Gupta, Aarti},
booktitle = {2013 35th International Conference on Software Engineering (ICSE)},
doi = {10.1109/ICSE.2013.6606559},
isbn = {978-1-4673-3076-3},
issn = {02705257},
mendeley-groups = {randoop-reimplementation},
month = {may},
pages = {132--141},
publisher = {IEEE},
title = {Feedback-directed unit test generation for {C/C++} using concolic
execution},
url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-84886426731
{\&}partnerID=tZOtx3y1},
year = {2013}
}


@Article{MajumdarN2018,
  author =       "Rupak Majumdar and Filip Niksic",
  title =        "Why is random testing effective for partition tolerance bugs?",
  journal =      PACMPL,
  year =         2017,
  volume =    2,
  number =    "POPL",
  pages =     "46:1--46:24",
  month =     dec,
  articleno = 46,
  numpages = 24,
}


@InProceedings{CumminsPML2018,
  author = 	 "Cummins, Chris and Petoumenos, Pavlos and Murray, Alastair and Leather, Hugh",
  title = 	 "Compiler fuzzing through deep learning",
  crossref =  "ISSTA2018",
  pages = 	 "95--105",
}


@InProceedings{LemieuxPSS2018,
  author = 	 "Lemieux, Caroline and Padhye, Rohan and Sen, Koushik and Song, Dawn",
  title = 	 "PerfFuzz: Automatically generating pathological inputs",
  crossref =  "ISSTA2018",
  pages = 	 "254--265",
}


@misc{randoop-tool-2020,
    key = {Randoop},
    year = {2020},
    howpublished = "\url{https://github.com/randoop/randoop}"
}
% Use: \citepalias{randoop-tool}
% then cite with: \citepalias{randoop-tool}

@misc{randoop-issue-tracker-2020,
    key = {Randoop issue tracker},
    year = {2010--2020},
    howpublished = "\url{https://github.com/randoop/randoop/issues}"
}

@misc{randoop-mailing-lists-2020,
    key = {Randoop mailing lists},
    year = {2010--2020},
    howpublished =
    "\url{https://groups.google.com/forum/\#!forum/randoop-developers} and \url{https://groups.google.com/forum/\#!forum/randoop-discuss}"
}

@misc{randoop-manual-4.2.3,
    key = {Randoop Manual},
    year = {2020},
    month = mar,
    note = "version 4.2.3",
    howpublished =
    "\url{https://randoop.github.io/randoop/manual/index.html}"
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Specification-based test suite generation
%%%


@PhdThesis{Meudec98,
  author = 	 "Christophe Meudec",
  title = 	 "Automatic Generation of Software Test Cases From Formal
		  Specifications",
  school = 	 "Queen's University of Belfast",
  year = 	 1998,
  OPTkey = 	 "",
  OPTtype = 	 "",
  OPTaddress = 	 "",
  OPTmonth = 	 "",
  OPTnote = 	 "",
  OPTannote = 	 "",
  abstract =     "Software testing consumes a large percentage of total
		  software development costs. Yet, it is still usually
		  performed manually in a non rigorous fashion. While
		  techniques, and limited automatic support, for the
		  generation of test data from the actual code of the
		  system under test have been well researched, test cases
		  generation from a high level specification of the
		  intended behaviour of the system being developed has
		  hardly been addressed. In this thesis we present a
		  rationale for using tests derived from high level formal
		  specifications and then set to find an efficient
		  technique for the generation of adequate test sets from
		  specifications written in our study language, VDM-SL. In
		  this..."
}

@InProceedings{ChangR99,
  author =       "Juei Chang and Debra J. Richardson",
  title =        "Structural Specification-Based Testing: Automated
                 Support and Experimental Evaluation",
  crossref =     "FSE99",
  pages =        "285--302",
  abstract =     "In this paper, we describe a testing technique, called
		  structural specification-based testing (SST), which
		  utilizes the formal specification of a program unit as
		  the basis for test selection and test coverage
		  measurement. We also describe an automated testing tool,
		  called ADLscope, which supports SST for program units
		  specified in Sun Microsystems' Assertion Definition
		  Language (ADL). ADLscope automatically generates coverage
		  conditions from a program's ADL specification. While the
		  program is tested, ADLscope determines which of these
		  conditions are covered by the tests. An uncovered
		  condition exhibits aspects of the specification
		  inadequately exercised during testing. The tester uses
		  this information to develop new test data to exercise the
		  uncovered conditions.
		  \par
		  We provide an overview of SST's specification-based test
		  criteria and describe the design and implementation of
		  ADLscope.  Specification-based testing is guided by a
		  specification, whereby the testing activity is directly
		  related to what a component under test is supposed to do,
		  rather than what it actually does. Specification-based
		  testing is a significant advance in testing, because it
		  is often more straightforward to accomplish and it can
		  reveal failures that are often missed by traditional
		  code-based testing techniques. As an initial evaluation
		  of the capabilities of specification-based testing, we
		  conducted an experiment to measure defect detection
		  capabilities, code coverage and usability of
		  SST/ADLscope; we report here on the results."
}


@InProceedings{ChangRS96:ISSTA,
  author =       "Juei Chang and Debra J. Richardson and Sriram Sankar",
  title =        "Structural Specification-based Testing with {ADL}",
  OPTeditor =       "Steven J. Ziel",
  crossref =     "ISSTA96",
  pages =        "62--70",
  genterms =     "DESIGN, VERIFICATION",
  categories =   "D.2.1 Software, SOFTWARE ENGINEERING,
                 Requirements/Specifications. D.2.5 Software, SOFTWARE
                 ENGINEERING, Testing and Debugging. F.3.1 Theory of
                 Computation, LOGICS AND MEANINGS OF PROGRAMS,
                 Specifying and Verifying and Reasoning about Programs,
                 Specification techniques.",
  annote =       "incomplete",
  abstract =     "This paper describes a specification-based black-box
		  technique for testing program units. The main
		  contribution is the method that we have developed to
		  derive test conditions, which are descriptions of test
		  cases, from the formal specification of each program
		  unit. The derived test conditions are used to guide test
		  selection and to measure comprehensiveness of existing
		  test suites. Our technique complements traditional
		  code-based techniques such as statement coverage and
		  branch coverage. It allows the tester to quickly develop
		  a black-box test suite.
		  \par
		  In particular, this paper presents techniques for
		  deriving test conditions from specifications written in
		  the Assertion Definition Language (ADL) [SH94], a
		  predicate logic-based language that is used to describe
		  the relationships between inputs and outputs of a program
		  unit. Our technique is fully automatable, and we are
		  currently implementing a tool based on the techniques
		  presented in this paper."
}


@TechReport{HayesS94,
  author = 	 "Roger Hayes and Sriram Sankar",
  title = 	 "Specifying and Testing Software Components using {ADL}",
  institution =  "Sun Microsystems Research",
  year = 	 1994,
  number =	 "TR-94-23",
  address =	 "Palo Alto, CA, USA",
  month =	 apr,
  abstract =     "This paper presents a novel approach to unit testing of
                  software components. This approach uses the specification
                  language ADL, that is particularly well-suited for
                  testing, to formally document the intended behavior of
                  software components. Another related language, TDD, is
                  used to systematically describe the test-data on which
                  the software components will be tested.
                  \par
                  This paper gives a detailed overview of the ADL language,
                  and a brief presentation of the TDD language.  Some
                  details of the actual test system are also presented,
                  along with some significant results.",
  URL =          "http://www.sun.com/research/techrep/1994/smli_tr-94-23.ps"
}


@InProceedings{1989:tav:balcer,
  author =       "Marc J. Balcer and William M. Hasling and Thomas J.
                 Ostrand",
  title =        "Automatic Generation of Test Scripts from Formal Test
                 Specifications",
  OMITeditor =   "Richard A. Kemmerer",
  crossref =     "TAV89",
  pages =        "210--218",
  genterms =     "LANGUAGES, RELIABILITY, VERIFICATION",
  categories =   "I.2.2 Computing Methodologies, ARTIFICIAL
                 INTELLIGENCE, Automatic Programming, Program
                 transformation. D.2.5 Software, SOFTWARE ENGINEERING,
                 Testing and Debugging, Testing tools (e.g., data
                 generators, coverage testing).",
  annote =       "incomplete",
}


@InProceedings{RichardsonOT89:TAV,
  author =       "Debra J. Richardson and Owen O'Malley and Cindy
                 Tittle",
  title =        "Approaches to Specification-Based Testing",
  IGNOREeditor =       "Richard A. Kemmerer",
  crossref =     "TAV89",
  pages =        "86--96",
  genterms =     "LANGUAGES, RELIABILITY, VERIFICATION",
  categories =   "D.3.2 Software, PROGRAMMING LANGUAGES, Language
                 Classifications, Larch. D.2.1 Software, SOFTWARE
                 ENGINEERING, Requirements/Specifications, ANNA. D.2.1
                 Software, SOFTWARE ENGINEERING,
                 Requirements/Specifications, Languages. D.2.5 Software,
                 SOFTWARE ENGINEERING, Testing and Debugging. D.2.4
                 Software, SOFTWARE ENGINEERING, Software/Program
                 Verification, Validation.",
  annote =       "incomplete",
  abstract =     "Current software testing practices focus, almost
		 exclusively, on the implementation, despite widely
		 acknowledged benefits of testing based on software
		 specifications.  We propose approaches to
		 specification-based testing by extending a wide variety of
		 implementation-based testing techniques to be applicable
		 to formal specification languages.  We demonstrate these
		 approaches for the Anna and Larch specification languages."
}

@Article{Offutt:1999:DDR,
  author =       "A. Jefferson Offutt and Zhenyi Jin and Jie Pan",
  title =        "The dynamic domain reduction procedure for test data
                 generation",
  journal =      j-SPE,
  volume =       "29",
  number =       "2",
  pages =        "167--193",
  month =        feb,
  year =         "1999",
  coden =        "SPEXBL",
  ISSN =         "0038-0644",
  bibdate =      "Thu Jul 29 15:12:12 MDT 1999",
  url =          "http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=55000306&;PLACEBO=IE.pdf;
                 http://www3.interscience.wiley.com/cgi-bin/abstract?ID=55000306",
  acknowledgement = ack-nhfb,
}

@Article{1991:jsi:offutt,
  author =       "A. Jefferson Offutt",
  title =        "An integrated automatic test data generation system",
  journal =      "Journal of Systems Integration",
  volume =       "1",
  year =         "1991",
  number =       "3",
  month =        nov,
  pages =        "391--409",
  referencedby = "\cite{1992:tosem:offutt}, \cite{1993:tosem:demillo}",
  annote =       "incomplete",
  abstract =
   "The Godzilla automatic test data generator is an integrated collection of
    tools that implements a relatively new test data generation method,
    constraint-based testing, that is based on mutation
    analysis. Constraint-based testing integrates mutation analysis with
    several other testing techniques, including statement coverage, branch
    coverage, domain perturbation and symbolic evaluation. Because Godzilla
    uses a rule-based approach to generate test data, it is easily extendible
    to allow new testing techniques to be integrated into the current
    system. This paper describes the system that has been built to implement
    constraint-based testing. Godzilla's design emphasizes orthogonality and
    modularity, allowing relatively easy extensions. Godzilla's internal
    structure and algorithms are described with emphasis on internal structures
    of the system, and the engineering problems that were solved during the
    implementation.",
}

@Article{OffuttL99,
  author =       "A. Jefferson Offutt and Shaoying Liu",
  title =        "Generating test data from {SOFL} specifications",
  journal =      "The Journal of Systems and Software",
  volume =       "49",
  number =       "1",
  pages =        "49--62",
  day =          "15",
  month =        dec,
  year =         "1999",
  coden =        "JSSODM",
  ISSN =         "0164-1212",
  bibdate =      "Tue Oct 10 10:06:05 MDT 2000",
  url =          "http://www.elsevier.nl/inca/publications/store/5/0/5/7/3/2/505732.pub.htt",
  acknowledgement = ack-nhfb,
  abstract =     "Software testing can only be formalized and quantified
		 when a solid basis for test generation can be defined.
		 Tests are commonly generated from the source code,
		 control flow graphs, design representations, and
		 specifications/requirements. Formal specifications
		 represent a significant opportunity for testing because
		 they precisely describe what functions the software is
		 supposed to provide in a form that can be easily
		 manipulated. This paper presents a new method for
		 generating tests from formal specifications. This method
		 is comprehensive in specification coverage, applies at
		 several levels of abstraction, and can be highly
		 automated.  The paper applies the method to SOFL
		 specifications, describes the technique, and demonstrates
		 the application on a case study. A preliminary evaluation
		 using a code-level coverage criterion (mutation testing),
		 indicates that the method can result in very effective
		 tests."
}


@TechReport{Burton99,
  author = 	 "Simon Burton",
  title = 	 "Towards automated unit testing of statechart implementations",
  institution =  "Department of Computer Science, University of York, UK",
  year = 	 "1999",
  OPTkey = 	 "",
  OPTtype = 	 "",
  OPTnumber = 	 "YCS 319",
  OPTaddress = 	 "",
  OPTmonth = 	 aug # "~2,",
  OPTnote = 	 "",
  OPTannote = 	 "",
  url =          "http://www.cs.york.ac.uk/ftpdir/reports/YCS-99-319.ps.gz",
  abstract =     "This report describes an automated method of unit test
		  design based on requirements specified in a subset of the
		  statechart notation.  The behaviour under test is first
		  extracted from the requirements and specified in the Z
		  notation.  Existing methods and tools are then applied to
		  this specification to derive the tests.  Using Z to model
		  the requirements and specify the tests allows for a
		  deductive approach to verifying test satisfiability, test
		  result correctness and certain properties of the
		  requirements.  An examination of the specification
		  coverage achieved by the tests is provided and the report
		  concludes with an evaluation of the work to date and a
		  set of directions for future work.",
}

@InProceedings{Donat97,
  author = {Michael R. Donat},
  title = {Automating Formal Specification-based Testing},
  crossref =  "TAPSOFT97",
  pages = {833--847},
  abstract = "This paper presents a technique for automatically generating
	      logical schemata that specify groups of black-box test cases
	      from formal specifications containing universal and
	      existential quantification. These schemata are called test
	      frames.  Previous automated techniques have dealt with
	      languages based on propositional logic. Since this new
	      technique deals with quantification it can be applied to more
	      expressive specifications. This makes the technique
	      applicable to specifications written at the system
	      requirements level. The limitations imposed by quantification
	      are discussed. Industrial needs are addressed by the
	      capabilities of recognizing and augmenting existing test
	      frames and by accommodating a range of specification-coverage
	      schemes. The coverage scheme taxonomy introduced in this
	      paper provides a standard for controlling the number of test
	      frames produced. This technique is intended to automate
	      portions of what is done manually by practitioners. Basing
	      this technique on formal rules of logical derivation ensures
	      that the test frames produced are logical consequences of the
	      specification. It is expected that deriving test frames
	      automatically will offset the cost of developing a formal
	      specification. This tangible product makes formal
	      specification more economically feasible for industry."
}


@InProceedings{TothDJoyce96,
  author = 	 "Kalman C. Toth and Michael R. Donat and Jeffrey J. Joyce",
  title = 	 "Generating Test Cases From Formal Specifications",
  crossref =  	 "INCOSE96",
  abstract =     "This paper describes the possible process elements and
		  benefits of applying ``Formal Methods'' to the
		  specification and testing of software requirements. It is
		  argued that the overall effort required to generate test
		  cases can be significantly reduced by applying these
		  methods. Ambiguities and inconsistencies are identified
		  and removed from the specifications through the use of
		  formal methods. This paper provides a sketch of a
		  theoretic foundation for generating test cases from
		  formalized software requirements specifications thereby
		  reducing test development effort and providing developers
		  and testers with a consistent interpretation of
		  requirements. Preliminary work also supports the thesis
		  that test case generation can be automated."
}


@InProceedings{DickF93,
  author = 	 "J. Dick and A. Faivre",
  title = 	 "Automating the generating and sequencing of test cases
		  from model-based specifications",
  crossref =  	 "FME93",
  pages = 	 "268--284",
}


@Article{BernotGM91:SEJ,
  author =       "Gilles Bernot and Marie Claude Gaudel and Bruno Marre",
  title =        "Software testing based on formal specifications: a
                 theory and a tool",
  journal =      "IEE Software Engineering Journal",
  month =        nov,
  volume =       "6",
  year =         "1991",
  keyword =      "Prolog, software engineering",
}

@Article{HoffmanSW99,
  author = 	 "Daniel Hoffman and Paul Strooper and Lee White",
  title = 	 "Boundary Values and Automated Component Testing",
  journal = 	 "Software Testing, Verification, and Reliability",
  year = 	 1999,
  volume =	 9,
  number =	 1,
  pages =	 "3--26",
  month =	 mar,
  abstract =     "Structural coverage approaches to software engineering
		  are mature, having been thoroughly studied for decades.
		  Significant tool support, in the form of instrumentation
		  for statement or branch coverage, is available in
		  commercial compilers. While structural coverage is
		  sensitive to which code structures are covered, it is
		  insensitive to the values of the variables when those
		  structures are executed. Data coverage approaches, e.g.,
		  boundary value coverage, are far less mature. They are
		  known to practitioners mostly as a few useful heuristics
		  with very little support for automation. Because of its
		  sensitivity to variable values, data coverage has
		  significant potential, especially when used in
		  combination with structural coverage. This paper
		  generalizes the traditional notion of boundary coverage,
		  and formalizes it with two new data coverage
		  measures. These measures are used to automatically
		  generate test cases, and from these, sophisticated test
		  suites for functions from the C++ Standard Template
		  Library.  Finally, the test suites are evaluated with
		  respect to both structural coverage and discovery of
		  seeded faults."
}


@InProceedings{HoffmanS00,
  author =	 "Daniel Hoffman and Paul Strooper",
  title =	 "Tools and Techniques for {Java} {API} Testing",
  booktitle = 	 "Proceedings of the 2000 Australian Software Engineering
		  Conference",
  OPTcrossref =  "",
  OPTkey = 	 "",
  pages = 	 "235--245",
  year = 	 "2000",
  OPTeditor = 	 "",
  OPTvolume = 	 "",
  OPTnumber = 	 "",
  OPTseries = 	 "",
  OPTaddress = 	 "",
  OPTmonth = 	 "",
  OPTorganization = "",
  OPTpublisher = "",
  OPTnote = 	 "",
  OPTannote = 	 "",
  abstract =     "With the advent of object-oriented languages and the
		  portability of Java APIs, the development and use of
		  reusable software components is becoming a
		  reality. Effective component reuse depends on component
		  reliability, which in turn depends on thorough
		  testing. The literature, however, provides few approaches
		  to component testing that are practical for the input
		  generation and output checking of the large number of
		  test cases required. In this paper, we present the Roast
		  tool and techniques for the testing of Java APIs. The
		  tool and techniques are illustrated on two non-trivial
		  components and quantitative results are presented to
		  substantiate the practicality and effectiveness of the
		  approach."
}


@Article{HoffmanS97,
  author = 	 "Daniel Hoffman and Paul Strooper",
  title = 	 "ClassBench: a framework for automated class testing",
  journal = 	 SPE,
  year = 	 1997,
  volume = 	 27,
  number = 	 5,
  pages = 	 "573--597",
}


@InProceedings{GrieskampGSV2002,
  author = 	 "Wolfgang Grieskamp and Yuri Gurevich and Wolfram Schulte and Margus Veanes",
  title = 	 "Generating finite state machines from abstract state machines",
  crossref =     "ISSTA2002",
  pages =	 "112--122",
 doi = {https://doi.acm.org/10.1145/566172.566190},
}


% This entry written by Carlos
@Manual{AsmL,
  title = 	 "Documentation for AsmL 2",
  author = 	 "",
  organization = "Foundations of Software Engineering group",
  address = 	 "Microsoft Research",
  edition = 	 "",
  month = 	 "",
  year = 	 "2003",
  note = 	 "\url{http://research.microsoft.com/fse/asml}",
}


@InProceedings{GallerWW2010,
  author = 	 "Galler, Stefan J. and Weiglhofer, Martin and Wotawa, Franz",
  title = 	 "Synthesize it: From {Design} by {Contract} to meaningful test input data",
  crossref =     "SEFM2010",
  pages = 	 "286--295",
}


@Article{IslamC2014,
  author = 	 "Islam, Mainul and Csallner, Christoph",
  title = 	 "Generating test cases for programs that are coded against interfaces and annotations",
  journal = 	 TOSEM,
  year = 	 2014,
  volume = 	 23,
  number = 	 3,
  pages = 	 "21:1--21:38",
  month = 	 may,
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Coverage criteria (test adequacy metrics)
%%%

@Article{ZhuHM97,
  author = 	 "Hong Zhu and Patrick A. V. Hall and John H. R. May",
  title = 	 "Software unit test coverage and adequacy",
  journal = 	 "ACM Computing Surveys",
  year = 	 1997,
  volume =	 29,
  number =	 4,
  pages =	 "366--427",
  month =	 dec,
  abstract =
   "Objective measurement of test quality is one of the key issues in software
    testing. It has been a major research focus for the last two decades. Many
    test criteria have been proposed and studied for this purpose. Various
    kinds of rationales have been presented in support of one criterion or
    another. We survey the research work in this area. The notion of adequacy
    criteria is examined together with its role in software dynamic testing. A
    review of criteria classification is followed by a summary of the methods
    for comparison and assessment of criteria."
}


@Article{Hamlet87,
  author = 	 "Richard G. Hamlet",
  title = 	 "Probable correctness theory",
  journal = 	 "Information Processing Letters",
  year = 	 1987,
  volume =	 25,
  number =	 1,
  pages =	 "17--25",
  month =	 apr # "~20,"
}


@InProceedings{WangER2007,
  author = 	 "Zhimin Wang and Sebastian Elbaum and David S. Rosenblum",
  title = 	 "Automated generation of context-aware tests",
  crossref =     "ICSE2007",
  pages = 	 "406--415",
}


@InProceedings{BernerWK2007,
  author = 	 "Stefan Berner and Roland Weber and Rudolf K. Keller",
  title = 	 "Enhancing software testing by judicious use of code coverage information",
  crossref =     "ICSE2007",
  pages = 	 "612--620",
}


@InProceedings{KapfhammerS2003,
  author = 	 "Kapfhammer, Gregory M. and Soffa, Mary Lou",
  title = 	 "A family of test adequacy criteria for database-driven applications",
  crossref =     "FSE2003",
  pages = 	 "98--107",
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Correlation between coverage and quality/defects/bugs
%%%


@InProceedings{InozemtsevaH2014,
  author = 	 "Laura Inozemtseva and Reid Holmes",
  title = 	 "Coverage is not strongly correlated with test suite effectiveness",
  crossref =     "ICSE2014",
  pages = 	 "435--445",
  abstract =
   "The coverage of a test suite is often used as a proxy for its ability to
    detect faults. However, previous studies that investigated the correlation
    between code coverage and test suite effectiveness have failed to reach a
    consensus about the nature and strength of the relationship between these
    test suite characteristics. Moreover, many of the studies were done with
    small or synthetic programs, making it unclear whether their results
    generalize to larger programs, and some of the studies did not account for
    the confounding influence of test suite size. In addition, most of the
    studies were done with adequate suites, which are rare in practice, so
    the results may not generalize to typical test suites.
    \par
    We have extended these studies by evaluating the relationship between test
    suite size, coverage, and effectiveness for large Java programs. Our study
    is the largest to date in the literature: we generated 31,000 test suites
    for five systems consisting of up to 724,000 lines of source code. We
    measured the statement coverage, decision coverage, and modified condition
    coverage of these suites and used mutation testing to evaluate their fault
    detection effectiveness.
    \par
    We found that there is a low to moderate correlation between coverage and
    effectiveness when the number of test cases in the suite is controlled
    for. In addition, we found that stronger forms of coverage do not provide
    greater insight into the effectiveness of the suite. Our results suggest
    that coverage, while useful for identifying under-tested parts of a
    program, should not be used as a quality target because it is not a good
    indicator of test suite effectiveness.",
}


@InProceedings{GopinathJG2014:ICSE,
  author = 	 "Gopinath, Rahul and Jensen, Carlos and Groce, Alex",
  title = 	 "Code coverage for suite evaluation by developers",
  crossref =     "ICSE2014",
  pages = 	 "72--82",
}

@InProceedings{GopinathJG2014:ISSRE,
  author = 	 "Gopinath, Rahul and Jensen, Carlos and Groce, Alex",
  title = 	 "Mutations: How close are they to real faults?",
  crossref =     "ISSRE2014",
  pages = 	 "189--200",
}


@InProceedings{KochharTL2015,
  author = 	 "Kochhar, Pavneet Singh and Thung, Ferdian and Lo, David",
  title = 	 "Code coverage and test suite effectiveness: Empirical study with real bugs in large systems",
  crossref =     "SANER2015",
  pages = 	 "560--564",
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Category partition method
%%%

@Article{Ostrand:1988:CPM,
  author =       "T. J. Ostrand and M. J. Balcer",
  title =        "The Category-Partition Method for Specifying and
                 Generating Functional Tests",
  journal =      CACM,
  volume =       "31",
  number =       "6",
  pages =        "676--686",
  month =        jun,
  year =         "1988",
  coden =        "CACMA2",
  ISSN =         "0001-0782",
  bibdate =      "Thu May 30 09:41:10 MDT 1996",
  url =          "http://www.acm.org/pubs/toc/Abstracts/0001-0782/62964.html",
  keywords =     "design; performance",
  subject =      "{\bf D.2.5}: Software, SOFTWARE ENGINEERING, Testing
                 and Debugging. {\bf K.6.3}: Computing Milieux,
                 MANAGEMENT OF COMPUTING AND INFORMATION SYSTEMS,
                 Software Management.",
  abstract =     "A method for creating functional test suites has been
		  developed in which a test engineer analyzes the system
		  specification, writes a series of formal test
		  specifications, and then uses a generator tool to produce
		  test descriptions from which test scripts are
		  written. The advantages of this method are that the
		  tester can easily modify the test specification when
		  necessary, and can control the complexity and number of
		  the tests by annotating the tests specification with
		  constraints."
}


@Article{GoodenoughG75:TSE,
  author = 	 "John B. Goodenough and Susan L. Gerhart",
  title = 	 "Toward a theory of test data selection",
  journal = 	 TSE,
  year = 	 1975,
  volume =	 1,
  number =	 2,
  pages =	 "156--173",
  month =	 jun
}

@InProceedings{1975:icrs:goodenough,
  author =       "John B. Goodenough and Susan L. Gerhart",
  title =        "Toward a theory of test data selection",
  booktitle =    "Proceedings of the 1975 International Conference on
                 Reliable Software",
  year =         "1975",
  pages =        "493--510",
  referencedby = "\cite{1975:ncse:wasserman}",
  annote =       "incomplete",
}

@Article{1975:tse:goodenough:b,
  title =        "Correction to ``{Toward} a Theory of Test Data
                 Selection''",
  author =       "John B. Goodenough and Susan L. Gerhart",
  pages =        "425",
  journal =      TSE,
  ISSN =         "0098-5589",
  year =         "1975",
  volume =       "1",
  month =        dec,
  number =       "4",
  corrects =     "\cite{1975:tse:goodenough:a}",
  annote =       "incomplete",
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Other use of specifications in testing
%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Test minimization/reduction
%%%

@InProceedings{LeitnerOZCM2007,
  author = 	 "Andreas Leitner and Manuel Oriol and Andreas Zeller and Ilinca Ciupa and Bertrand Meyer",
  title = 	 "Efficient unit test case minimization",
  crossref =     "ASE2007",
  pages = 	 "417--420",
  abstract =
   "Randomized unit test cases can be very effective in detecting
   defects. In practice, however, failing test cases often comprise long
   sequences of method calls that are tiresome to reproduce and debug. We
   present a combination of static slicing and delta debugging that
   automatically minimizes the sequence of failure-inducing method
   calls. In a case study on the EiffelBase library, the strategy minimizes
   failing unit test cases on average by 96\%.
   \par
   This approach improves on the state of the art by being far more
   efficient: in contrast to the approach of Lei and Andrews, who use delta
   debugging alone, our case study found slicing to be 50 times faster,
   while providing comparable results. The combination of slicing and delta
   debugging gives the best results and is 11 times faster.",
}


@InProceedings{HsuO2009,
  author = 	 "Hsu, Hwa-You and Orso, Alessandro",
  title = 	 "{MINTS}: A general framework and tool for supporting test-suite minimization",
  crossref =     "ICSE2009",
  pages = 	 "419--429",
}

@InProceedings{ShiGGZM2014,
  author = 	 "Shi, August and Gyori, Alex and Gligoric, Milos and Zaytsev, Andrey and Marinov, Darko",
  title = 	 "Balancing trade-offs in test-suite reduction",
  crossref =     "FSE2014",
  pages = 	 "246--256",
}

@InProceedings{WongHLM1995,
  author = 	 "Wong, W. Eric and Horgan, Joseph R. and London, Saul and Mathur, Aditya P.",
  title = 	 "Effect of test set minimization on fault detection effectiveness",
  crossref =     "ICSE95",
  pages = 	 "41--50",
}

@InProceedings{RothermelHOH1998,
  author = 	 "Gregg Rothermel and Mary Jean Harrold and Jeffery Ostrin and Christie Hong",
  title = 	 "An empirical study of the effects of minimization on the fault detection capabilities of test suites",
  crossref =     "ICSM98",
  pages = 	 "34--43",
  supersededby = "RothermelHvRH2002"
}

@Article{RothermelHvRH2002,
  author = 	 "Rothermel, Gregg and Harrold, Mary Jean and von Ronne, Jeffery and Hong, Christie",
  title = 	 "Empirical studies of test-suite reduction",
  journal = 	 STVR,
  year = 	 2002,
  volume = 	 12,
  number = 	 4,
  pages = 	 "219--249",
  month = 	 dec,
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Mutation analysis (mutation testing)
%%%


@Article{DeMilloLS78,
  author = 	 "R. DeMillo and R. Lipton and F. Sayward",
  title = 	 "Hints on test data selection: Help for the practicing programmer",
  journal = 	 "IEEE Computer",
  year = 	 1978,
  volume =	 4,
  number =	 11,
  pages =	 "34--41",
  month =	 apr
}

@InProceedings{OffuttU2000,
  author = 	 "Jeff Offutt and Roland H. Untch",
  title = 	 "Mutation 2000: Uniting the Orthogonal",
  booktitle =	 "Mutation 2000: Mutation Testing in the Twentieth and the Twenty First Centuries",
  pages =	 "45--55",
  year =	 2000,
  address =	 "San Jose, CA",
  month =	 oct,
  abstract =
   "Mutation testing is a powerful, but computationally expensive, technique
    for unit testing software. This expense has prevented mutation from
    becoming widely used in practical situations, but recent engineering
    advances have given us techniques and algorithms for significantly reducing
    the cost of mutation testing. These techniques include a new algorithmic
    execution technique called schema-based mutation, an approximation
    technique called weak mutation, a reduction technique called selective
    mutation, heuristics for detecting equivalent mutants, and algorithms for
    automatic test data generation. This paper reviews experimentation with
    these advances and outlines a design for a system that will approximate
    mutation, but in a way that will be accessible to everyday programmers. We
    envision a system to which a programmer can submit a program unit, and get
    back a set of input/output pairs that are guaranteed to form an effective
    test of the unit by being close to mutation adequate. We believe this
    system will be efficient enough to be adopted by leading-edge software
    developers. Full automation in unit testing has the potential to
    dramatically change the economic balance between testing and development,
    by reducing the cost of testing from the major part of the total
    development cost to a small fraction.",
}


@TechReport{AgrawalDHHHKMMS2006,
  author = 	 "Hiralal Agrawal and Richard A. DeMillo and Bob Hathaway and William Hsu and Wynne Hsu and E. W. Krauser and R. J. Martin and Aditya P. Mathur and Eugene Spafford",
  title = 	 "Design of Mutant Operators for the {C} Programming Language",
  institution =  "Department of Computer Science, Purdue University",
  year = 	 2006,
  number = 	 "SERC-TR-41-P",
  address = 	 "Lafayette, Indiana",
  month = 	 apr,
  note = 	 "version 1.04",
}


@Article{BarbosaMV2001,
  author = 	 "Ellen Francine Barbosa and Jos\'e Carlos Maldonado and Auri Marcelo Rizzo Vincenzi",
  title = 	 "Toward the Determination of Sufficient Mutant Operators for {C}",
  journal = 	 "Software Testing, Verification and Reliability",
  year = 	 2001,
  volume = 	 11,
  pages = 	 "113--136",
  month = 	 jun,
}


@Article{KimCM2001,
  author = 	 "Sun-Woo Kim and John A. Clark and John A. McDermid",
  title = 	 "Investigating the effectiveness of object-oriented testing strategies using the mutation method",
  journal = 	 "Software Testing, Verification and Reliability",
  year = 	 2001,
  volume = 	 11,
  number = 	 4,
  pages = 	 "207--225",
  month = 	 dec,
}


@Article{GhoshM2001,
  author = 	 "S. Ghosh and A. P. Mathur",
  title = 	 "Interface mutation",
  journal = 	 "Software Testing, Verification and Reliability",
  year = 	 2001,
  volume = 	 11,
  number = 	 4,
  pages = 	 "227--247",
  month = 	 dec,
}

@Article{VincenziMXD2001,
  author = 	 "A. M. R. Vincenzi and J. C. Maldonado and E. F. Barbosa and M. E. Delamaro",
  title = 	 "Unit and integration testing strategies for {C} programs using mutation",
  journal = 	 "Software Testing, Verification and Reliability",
  year = 	 2001,
  volume = 	 11,
  number = 	 4,
  pages = 	 "249--268",
  month = 	 dec,
}


@InCollection{BlackOY2001,
  author = 	 "Black, Paul E. and Okun, Vadim and Yesha, Yaacov",
  title = 	 "Mutation of model checker specifications for test generation and evaluation",
  booktitle = 	 "Mutation testing for the new century",
  pages = 	 "14--2",
  publisher = "Kluwer Academic Publishers",
  year = 	 2001,
}

@InProceedings{BlackOY2000,
  author = 	 "Black, Paul E. and Okun, Vadim and Yesha, Yaacov",
  title = 	 "Mutation operators for specifications",
  crossref =     "ASE2000",
  pages = 	 "81--88",
}

@Article{AmmannB2001,
  author = 	 "Paul E. Ammann and Paul E. Black",
  title = 	 "A specification-based coverage metric to evaluate test sets",
  journal = 	 "International Journal of Reliability, Quality and Safety Engineering",
  year = 	 2001,
  volume =	 8,
  number =	 4,
  pages =	 "275--299",
  abstract =
   "Software developers use a variety of formal and informal methods, including
    testing, to argue that their systems are suitable for building high
    assurance applications. In this paper, we develop another connection
    between formal methods and testing by defining a specification-based
    coverage metric to evaluate test sets. Formal methods in the form of a
    model checker supply the necessary automation to make the metric
    practical. The metric gives the software developer assurance that a given
    test set is sufficiently sensitive to the structure of an application's
    specification. We also develop the necessary foundation for the metric and
    then illustrate the metric on an example."
}


@InProceedings{KnightA85,
  author = 	 "John C. Knight and Paul E. Ammann",
  title = 	 "An experimental evaluation of simple methods for seeding
                  program errors",
  crossref =     "ICSE85",
  pages =	 "337--342",
}


@InProceedings{AndrewsBL2005,
  author = 	 "J. H. Andrews and L. C. Briand and Y. Labiche",
  title = 	 "Is mutation an appropriate tool for testing experiments?",
  crossref =     "ICSE2005",
  pages =	 "402--411",
  abstract =
   "The empirical assessment of test techniques plays an important role in
    software testing research. One common practice is to instrument faults,
    either manually or by using mutation operators.  The latter allows the
    systematic, repeatable seeding of large numbers of faults; however, we do
    not know whether empirical results obtained this way lead to valid,
    representative conclusions.  This paper investigates this important
    question based on a number of programs with comprehensive pools of test
    cases and known faults. It is concluded that, based on the data available
    thus far, the use of mutation operators is yielding trustworthy results
    (generated mutants are similar to real faults). Mutants appear however to
    be different from hand-seeded faults that seem to be harder to detect than
    real faults.",
}


@Article{Offutt92,
  author = 	 "A. Jefferson Offutt",
  title = 	 "Investigations of the software testing coupling effect",
  journal = 	 TOSEM,
  year = 	 1992,
  volume =	 1,
  number =	 1,
  pages =	 "5--20",
  month =	 jan,
  abstract =
   "Fault-based testing strategies test software by focusing on specific,
    common types of faults. The coupling effect hypothesizes that test data
    sets that detect simple types of faults are sensitive enough to detect more
    complex types of faults. This paper describes empirical investigations into
    the coupling effect over a specific class of software faults. All of the
    results from this investigation support the validity of the coupling
    effect. The major conclusion from this investigation is the fact that by
    explicitly testing for simple faults, we are also implicitly testing for
    more complicated faults, giving us confidence that fault-based testing is
    an effective way to test software.",
}


@Article{AndrewsBLN2006,
  author = 	 "J. H. Andrews and L. C. Briand and Y. Labiche and A. S. Namin",
  title = 	 "Using mutation analysis for assessing and comparing testing coverage criteria",
  journal = 	 IEEETSE,
  year = 	 2006,
  volume = 	 32,
  number = 	 8,
  pages = 	 "608--624",
  month = 	 aug,
}


%%% Uses of mutation testing, as opposed to contributions in mutation
%%% testing per se.


@Article{Woodward93,
  author = 	 "M. R. Woodward",
  title = 	 "Errors in algebraic specifications and an experimental mutation testing tool",
  journal = 	 "Software Eng. Journal",
  year = 	 1993,
  NEEDvolume = 	 "*",
  NEEDnumber = 	 "*",
  pages = 	 "211--224",
  month = 	 jul,
}


@Article{MaOK2005,
  author = 	 "Ma, Yu-Seung and Offutt, Jeff and Kwon, Yong Rae",
  title = 	 "{MuJava}: An automated class mutation system",
  journal = 	 STVR,
  year = 	 2005,
  volume = 	 15,
  number = 	 2,
  pages = 	 "97--133",
  month = 	 jun,
}

@InProceedings{SmithW2007,
  author = 	 "Ben H. Smith and Laurie Williams",
  title = 	 "An empirical evaluation of the {MuJava} mutation operators",
  crossref =     "TAICPART2007",
  pages = 	 "193--202",
}


@MastersThesis{Umar2006,
  author = 	 "Maryam Umar",
  title = 	 "An Evaluation of Mutation Operators for Equivalent Mutants",
  school = 	 "King's College",
  year = 	 2006,
  type = 	 "{MS} Project",
  address = 	 "London",
  month = 	 sep # "~1,",
}


@Misc{PIT,
  key = 	 "PIT",
  title = 	 "PIT homepage",
  howpublished = "\url{http://pitest.org/}",
  note = 	 "Accessed Feb 5, 2014",
}


@InProceedings{ZhangMZK2012,
  author = 	 "Zhang, Lingming and Marinov, Darko and Zhang, Lu and Khurshid, Sarfraz",
  title = 	 "Regression mutation testing",
  crossref =     "ISSTA2012",
  pages = 	 "331--341",
}


@InProceedings{Just2014,
  author = 	 "Ren{\'e} Just",
  authorASCII =  "Rene Just",
  title = 	 "The {Major} Mutation Framework: Efficient and Scalable Mutation Analysis for {Java}",
  crossref =     "ISSTA2014",
  pages = 	 "433--436",
}


@InProceedings{MoonKKY2014,
  author = 	 {Moon, Seokhyeon and Kim, Yunho and Kim, Moonzoo and Yoo, Shin},
  title = 	 {Ask the Mutants: Mutating Faulty Programs for Fault Localization},
  crossref =     "ICST2014",
  pages = 	 "153--162",
}


@InProceedings{TitcheuChekamPLTH2017,
  author =       "Titcheu Chekam, Thierry and Mike Papadakis and Le Traon, Yves and Mark Harman",
  title =        "An empirical study on mutation, statement and branch coverage fault revelation that avoids the unreliable clean program assumption",
  crossref =  "ICSE2017",
  pages =     "597-608",
}


@InProceedings{JustKA2018,
  author =       "Just, Ren{\'e} and Kurtz, Bob and Ammann, Paul",
  title =        "Inferring mutant utility from program context",
  crossref =  "ICSE2018",
  pages =     "284--294",
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Generating test cases
%%%


@InProceedings{GuptaMS98,
  author = 	 "Neelam Gupta and Aditya P. Mathur and Mary Lou Soffa",
  title = 	 "Automated test data generation using an iterative
                  relaxation method",
  crossref =     "FSE98",
  pages =	 "231--244",
}


@InProceedings{ClaessenH2000,
  author = 	 "Koen Claessen and John Hughes",
  title = 	 "{QuickCheck}: A lightweight tool for random testing of
                  {Haskell} programs",
  crossref =     "ICFP2000",
  pages =	 "268--279",
  abstract =
   "QuickCheck is a tool which aids the Haskell programmer in formulating and
    testing properties of programs. Properties are described as Haskell
    functions, and can be automatically tested on random input, but it is also
    possible to define custom test data generators. We present a number of case
    studies, in which the tool w as successfully used, and also point out some
    pitfalls to avoid. Random testing is especially suitable for functional
    programs because properties can be stated at a fine grain. When a function
    is built from separately tested components, then random testing suffices to
    obtain good coverage of the definition under test."
}


@InProceedings{ClaessenH2002,
  author = 	 "Koen Claessen and John Hughes",
  title = 	 "Testing monadic code with {QuickCheck}",
  booktitle =	 "ACM SIGPLAN 2002 Haskell Workshop",
  pages =	 "65--77",
  year =	 2002,
  address =	 "Pittsburgh, PA, USA",
  month =	 oct # "~3,",
  abstract =
   "QuickCheck is a previously published random testing tool for Haskell
    programs. In this paper we show how to use it for testing monadic code, and
    in particular imperative code written using the ST monad. QuickCheck tests
    a program against a specification: we show that QuickCheck's specification
    language is sufficiently powerful to represent common forms of
    specifications: algebraic, model-based (both functional and relational),
    and pre-/post-conditional. Moreover, all these forms of specification can
    be used directly for testing. We define a new language of monadic
    properties, and make a link between program testing and the notion of
    observational equivalence."
}


@Manual{Jtest,
  title = 	 "Jtest version 4.5",
  OPTkey = 	 "",
  OPTauthor = 	 "",
  organization = "Parasoft Corporation",
  OPTaddress = 	 "",
  OPTedition = 	 "",
  OPTmonth = 	 "",
  OPTyear = 	 "",
  note = 	 "\url{http://www.parasoft.com/}",
  OPTannote = 	 ""
}


@Article{WeyukerGS94,
  author = 	 "Elaine Weyuker and Tarak Goradia and Ashutosh Singh",
  title = 	 "Automatically generating test data from a {Boolean} specification",
  journal = 	 TSE,
  year = 	 1994,
  volume =	 20,
  number =	 5,
  pages =	 "353--363",
  month =	 may,
  abstract =
   "This paper presents a family of strategies for automatically generating
    test data for any implementation intended to satisfy a given specification
    that is a Boolean formula.  The fault detection effectiveness of these
    strategies is investigated both analytically and empirically, and the
    costs, assessed in terms of test set size, are compared."
}


@Article{CsallnerS2004:JCrasher,
  author = 	 "Christoph Csallner and Yannis Smaragdakis",
  title = 	 "{JCrasher}: an automatic robustness tester for {Java}",
  journal = 	 SPE,
  year = 	 2004,
  volume =	 34,
  number =	 11,
  pages =	 "1025--1050",
  month =	 sep,
  abstract =
   "JCrasher is an automatic robustness testing tool for Java code. JCrasher
    examines the type information of a set of Java classes and constructs code
    fragments that will create instances of different types to test the behavior
    of public methods under random data. JCrasher attempts to detect bugs by
    causing the program under test to ``crash'', that is, to throw an undeclared
    runtime exception. Although in general the random testing approach has many
    limitations, it also has the advantage of being completely automatic: no
    supervision is required except for off-line inspection of the test cases
    that have caused a crash. Compared to other similar commercial and research
    tools, JCrasher offers several novelties: it transitively analyzes methods,
    determines the size of each tested method's parameter-space and selects
    parameter combinations and therefore test cases at random, taking into
    account the time allocated for testing; it defines heuristics for
    determining whether a Java exception should be considered a program bug or
    the JCrasher supplied inputs have violated the code's preconditions; it
    includes support for efficiently undoing all the state changes introduced by
    previous tests; it produces test files for JUnit --- a popular Java testing
    tool; and can be integrated in the Eclipse IDE."
}

@InProceedings{CsallnerS2005,
  author = 	 "Christoph Csallner and Yannis Smaragdakis",
  title = 	 "{Check 'n' Crash}:  Combining static checking and testing",
  crossref =     "ICSE2005",
  pages = 	 "422--431",
  abstract =
   "We present an automatic error-detection approach that combines static
    checking and concrete test-case generation. Our approach consists of taking
    the abstract error conditions inferred using theorem proving techniques by
    a static checker (ESC/Java), deriving specific error conditions using a
    constraint solver, and producing concrete test cases (with the JCrasher
    tool) that are executed to determine whether an error truly exists. The
    combined technique has advantages over both static checking and automatic
    testing individually. Compared to ESC/Java, we eliminate spurious warnings
    and improve the ease-of-comprehension of error reports through the
    production of Java counterexamples. Compared to JCrasher, we eliminate the
    blind search of the input space, thus reducing the testing time and
    increasing the test quality.",
  usesDaikon = 1,
  downloads = "http://www.cs.umass.edu/~yannis/cnc-final.pdf PDF",
}


@InProceedings{TombBV2007,
  author = 	 "Aaron Tomb and Guillaume Brat and Willem Visser",
  title = 	 "Variably interprocedural program analysis for runtime error detection",
  crossref =     "ISSTA2007",
  pages = 	 "97--107",
  abstract =
   "This paper describes an analysis approach based on a combination of
    static and dynamic techniques to find run-time errors in Java code. It
    uses symbolic execution to find constraints under which an error (e.g.,
    a null pointer dereference, array out of bounds access, or assertion
    violation) may occur and then solves these constraints to find test
    inputs that may expose the error. It only alerts the user to the
    possibility of a real error when it detects the expected exception
    during a program run.
    \par
    The analysis is customizable in two important ways. First, we can
    adjust how deeply to follow calls from each top-level method. Second,
    we can adjust the path termination condition for the symbolic execution
    engine to be either a bound on the path condition length or a bound on
    the number of times each instruction can be revisited.
    \par
    We evaluated the tool on a set of benchmarks from the literature as
    well as a number of real-world systems that range in size from a few
    thousand to 50,000 lines of code. The tool discovered all known errors
    in the benchmarks (as well as some not previously known) and reported
    on average 8 errors per 1000 lines of code for the industrial
    examples. In both cases the interprocedural call depth played little
    role in the error detection. That is, an intraprocedural analysis seems
    adequate for the class of errors we detect.",
}


@InProceedings{GodefroidKS2005,
  author = 	 "Patrice Godefroid and Nils Klarlund and Koushik Sen",
  title = 	 "{DART}: Directed automated random testing",
  crossref =     "PLDI2005",
  pages =        "213-223",
}


@InProceedings{Godefroid2007,
  author = 	 "Patrice Godefroid",
  title = 	 "Compositional dynamic test generation",
  crossref =     "POPL2007",
  pages = 	 "47--54",
}


@InProceedings{VisserPP2006,
  author = 	 "Willem Visser and Corina S. P\u{a}s\u{a}reanu and Radek Pel\'{a}nek",
  authorASCII =  "Willem Visser and Corina S. Pasareanu and Radek Pelanek",
  title = 	 "Test input generation for {Java} containers using state matching",
  crossref =     "ISSTA2006",
  pages = 	 "37--48",
}


@InProceedings{VisserPK2004,
  author = 	 "Willem Visser and Corina S. P\u{a}s\u{a}reanu and Sarfraz Khurshid",
  authorASCII =  "Willem Visser and Corina S. Pasareanu and Sarfraz Khurshid",
  title = 	 "Test input generation with {Java PathFinder}",
  crossref =     "ISSTA2004",
  pages = 	 "97--107",
}


@InProceedings{SenMA2005,
  author = 	 "Koushik Sen and Darko Marinov and Gul Agha",
  title = 	 "{CUTE}: A Concolic Unit Testing Engine for {C}",
  crossref =     "FSE2005",
  pages = 	 "263--272",
}

@InProceedings{SenA2006,
  author = 	 "Koushik Sen and Gul Agha",
  title = 	 "{CUTE} and {jCUTE}: Concolic unit testing and explicit path model-checking tools",
  crossref =     "CAV2006",
  pages = 	 "419--423",
}


@InProceedings{QuR2011,
  author = 	 "Xiao Qu and Brian Robinson",
  title = 	 "A case study of concolic testing tools and their limitations",
  crossref =     "ESEM2011",
  NEEDpages = 	 "*",
}


@InProceedings{YuanX2006,
  author = 	 "Hai Yuan and Tao Xie",
  title = 	 "Substra: A framework for automatic generation of integration tests",
  crossref =     "AST2006",
  pages = 	 "64--70",
  abstract =
   "A component-based software system consists of well-encapsulated components
    that interact with each other via their interfaces.  Software integration
    tests are generated to test the interactions among different components.
    These tests are usually in the form of sequences of interface method calls.
    Although many components are equipped with documents that provide informal
    specifications of individual interface methods, few documents specify
    component interaction constraints on the usage of these interface methods,
    including the order in which these methods should be called and the
    constraints on the method arguments and returns across multiple methods.
    In this paper, we propose Substra, a framework for automatic generation of
    software integration tests based on call-sequence constraints inferred from
    dynamic executions.  Two types of sequencing constraints are inferred:
    shared subsystem states and object define-use relationships.  The inferred
    constraints are used to guide automatic generation of integration tests.
    We have implemented Substra with a tool and applied the tool on an ATM
    example.  The preliminary results show that the tool can effectively
    generate integration tests that exercise new program behaviors.",
  usesDaikon = 1,
  downloads = "https://taoxie.cs.illinois.edu/publications/ast06-substra.pdf PDF",
}


@InProceedings{YuanM2007,
  author = 	 "Xun Yuan and Atif M. Memon",
  title = 	 "Using {GUI} run-time state as feedback to generate test cases",
  crossref =     "ICSE2007",
  pages = 	 "396--405",
  abstract =
   "This paper presents a new automated model-driven technique to generate test
    cases by using feedback from the execution of a ``seed test suite'' on an
    application under test (AUT). The test cases in the seed suite are designed
    to be generated automatically and executed very quickly. During their
    execution, feedback obtained from the AUT's run-time state is used to
    generate new, ``improved'' test cases. The new test cases subsequently
    become part of the seed suite.  This ``anytime technique'' continues
    iteratively, generating and executing additional test cases until resources
    are exhausted or testing goals have been met.
    \par
    The feedback-based technique is demonstrated for automated testing of
    graphical user interfaces (GUIs). An existing abstract model of the GUI is
    used to automatically generate the seed test suite. It is executed; during
    its execution, state changes in the GUI pinpoint important relationships
    between GUI events, which evolve the model and help to generate new test
    cases. Together with a reverse-engineering algorithm used to obtain the
    initial model and seed suite, the feedback-based technique yields a fully
    automatic, end-to-end GUI testing process. A feasibility study on four
    large fielded open-source software (OSS) applications demonstrates that
    this process is able to significantly improve existing techniques and help
    identify/report serious problems in the OSS. In response, these problems
    have been fixed by the developers of the OSS in subsequent versions.",
}


@Article{CohenDFP97,
  author = 	 "David M. Cohen and Siddhartha R. Dalal and Michael L. Fredman and Gardner C. Patton",
  title = 	 "The {AETG} system: An approach to testing based on combinatorial design",
  journal = 	 TSE,
  year = 	 1997,
  volume = 	 23,
  number = 	 7,
  pages = 	 "437--444",
  month = 	 jul,
  abstract =
   "This paper describes a new approach to testing that uses combinatorial
    designs to generate tests that cover the pair-wise, triple or n-way
    combinations of a system's test parameters. These are the parameters that
    determine the system's test scenarios. Examples are system configuration
    parameters, user inputs and other external events. We implemented this new
    method in the AETG system.
    The AETG system uses new combinatorial algorithms to generate test sets
    that cover all valid n-way parameter combinations. The size of an AETG test
    set grows logarithmically in the number of test parameters. This allows
    testers to define test models with dozens of parameters.
    The AETG system is used in a variety of applications for unit, system, and
    interoperability testing. It has generated both high-level test plans and
    detailed test cases. In several applications, it greatly reduced the cost
    of test plan development.",
}


@InProceedings{StaatsGH2012,
  author = 	 "Staats, Matt and Gay, Gregory and Heimdahl, Mats P. E.",
  title = 	 "Automated oracle creation support, or: How {I} learned to stop worrying about fault propagation and love mutation testing",
  crossref =     "ICSE2012",
  pages = 	 "870--880",
}


@InProceedings{MisailovicMPKM2007,
  author = 	 "Misailovic, Sasa and Milicevic, Aleksandar and Petrovic, Nemanja and Khurshid, Sarfraz and Marinov, Darko",
  title = 	 "Parallel test generation and execution with {Korat}",
  crossref =     "FSE2007",
  pages = 	 "135--144",
}


@InProceedings{FraserZ2011,
  author = 	 "Gordon Fraser and Andreas Zeller",
  title = 	 "Generating parameterized unit tests",
  crossref =     "ISSTA2011",
  pages = 	 "364--374",
}


@Article{Howden1975,
  author = 	 "William E. Howden",
  title = 	 "Methodology for the generation of program test data",
  journal = 	 IEEETC,
  year = 	 1975,
  volume = 	 "C-24",
  number = 	 5,
  pages = 	 "554--560",
  month = 	 may,
}


@InProceedings{ShamshiriJRFMA2015,
  author = 	 "Sina Shamshiri and Ren{\'e} Just and Jos{\'e} M. Rojas and Gordon Fraser and Phil McMinn and Andrea Arcuri",
  authorASCII =  "Sina Shamshiri and Rene Just and Jose M. Rojas and Gordon Fraser and Phil McMinn and Andrea Arcuri",
  title = 	 "Do automatically generated unit tests find real faults? {An} empirical study of effectiveness and challenges",
  crossref =     "ASE2015",
  pages = 	 "201--211",
}


@InProceedings{MaAZSGR2015,
  author = 	 "Lei Ma and Cyrille Artho and Cheng Zhang and Hiroyuki Sato and Johannes Gmeiner and Rudolf Ramler",
  title = 	 "{GRT}: Program-analysis-guided random testing",
  crossref =     "ASE2015",
  pages = 	 "212--223",
}


@Article{CeccatoMMNT2015,
  author =       "Ceccato, Mariano and Marchetto, Alessandro and Mariani, Leonardo and Nguyen, Cu D. and Tonella, Paolo",
  title =        "Do automatically generated test cases make debugging easier? {An} experimental assessment of debugging effectiveness and efficiency",
  journal =      TOPLAS,
  year =         2015,
  volume =    25,
  number =    1,
  pages =     "5:1--5:38",
  month =     dec,
}


@InProceedings{FraserA2011,
  author = 	 "Fraser, Gordon and Arcuri, Andrea",
  title = 	 "{EvoSuite}: Automatic test suite generation for object-oriented software",
  crossref = 	 "FSE2011",
  pages = 	 "416-419",
}


%%%
%%% Record and replay
%%%

@inproceedings{949442,
 author = {Meszaros, Gerard},
 title = {Agile regression testing using record \& playback},
 crossref =  "OOPSLACompanion2003",
 pages = {353--360},
 abstract =
   "There are times when it is not practical to hand-script automated tests
    for an existing system before one starts to modify it (whether to
    refactor it to permit automated testing or to add new functionality). In
    these circumstances, the use of ``record & playback'' testing may be a
    viable alternative to handwriting all the tests.This paper describes
    experiences using this approach and summarizes key learnings applicable
    to other projects.",
}


@InProceedings{ClauseO2007,
  author = 	 "James Clause and Alessandro Orso",
  title = 	 "A technique for enabling and supporting debugging of field failures",
  crossref =     "ICSE2007",
  pages = 	 "261--270",
  abstract =
   "It is difficult to fully assess the quality of software inhouse, outside
    the actual time and context in which it will execute after deployment. As a
    result, it is common for software to manifest field failures, failures that
    occur on user machines due to untested behavior. Field failures are
    typically difficult to recreate and investigate on developer platforms, and
    existing techniques based on crash reporting provide only limited support
    for this task. In this paper, we present a technique for recording,
    reproducing, and minimizing failing executions that enables and supports
    inhouse debugging of field failures. We also present a tool that implements
    our technique and an empirical study that evaluates the technique on a
    widely used e-mail client.",
 doi = {http://dx.doi.org/10.1109/ICSE.2007.10},
}


@InProceedings{ElbaumCDD2006,
  author = 	 "Sebastian Elbaum and Hui Nee Chin and Matthew B. Dwyer and Jonathan Dokulil",
  title = 	 "Carving differential unit test cases from system test cases",
  crossref =     "FSE2006",
  pages = 	 "253--264",
  abstract =
   "Unit test cases are focused and efficient. System tests are effective
   at exercising complex usage patterns. \emph{Differential unit tests} (DUT)
   are a hybrid of unit and system tests. They are generated by carving the
   system components, while executing a system test case, that influence
   the behavior of the target unit, and then re-assembling those components
   so that the unit can be exercised as it was by the system test. We
   conjecture that DUTs retain some of the advantages of unit tests, can be
   automatically and inexpensively generated, and have the potential for
   revealing faults related to intricate system executions. In this paper
   we present a framework for automatically carving and replaying DUTs that
   accounts for a wide-variety of strategies, we implement an instance of
   the framework with several techniques to mitigate test cost and enhance
   flexibility, and we empirically assess the efficacy of carving and
   replaying DUTs.",
}


@InProceedings{JoshiO2007,
  author = 	 "Shrinivas Joshi and Alessandro Orso",
  title = 	 "{SCARPE}:  A technique and tool for selective capture and replay of program executions",
  crossref =     "ICSM2007",
  pages = 	 "234--243",
  abstract =
   "Because of software's increasing dynamism and the heterogeneity of
    execution environments, the results of in-house testing and maintenance
    are often not representative of the way the software behaves in the
    field. To alleviate this problem, we present a technique for capturing
    and replaying partial executions of deployed software. Our technique can
    be used for various applications, including generation of test cases
    from user executions and post-mortem dynamic analysis. We present our
    technique and tool, some possible applications, and a preliminary
    empirical evaluation that provides initial evidence of the feasibility
    of our approach.",
}


@InProceedings{LeitnerCOMF2007,
  author = 	 "Andreas Leitner and Ilinca Ciupa and Manuel Oriol and Bertrand Meyer and Arno Fiva",
  title = 	 "{Contract Driven Development} = {Test Driven Development} $-$ writing test cases",
  crossref =     "FSE2007",
  pages = 	 "425--434",
}


@InProceedings{ChoiH98,
  author = 	 "Jong-Deok Choi and Harini Srinivasan",
  title = 	 "Deterministic replay of {Java} multithreaded applications",
  crossref =     "SPDT98",
  pages =	 "48--59",
  doi = {https://doi.acm.org/10.1145/281035.281041},
}


@InProceedings{StevenCFP2000,
  author = 	 "John Steven and Pravir Chandra and Bob Fleck and Andy Podgurski",
  title = 	 "{jRapture}: A Capture/Replay tool for observation-based testing",
  crossref =     "ISSTA2000",
  pages = 	 "158--167",
  abstract =
   "We describe the design of jRapture: a tool for capturing and replaying
    Java program executions in the field. jRapture works with Java binaries
    (byte code) and any compliant implementation of the Java virtual
    machine. It employs a lightweight, transparent capture process that
    permits unobtrusive capture of a Java programs executions. jRapture
    captures interactions between a Java program and the system, including
    GUI, file, and console inputs, among other types, and on replay it
    presents each thread with exactly the same input sequence it saw during
    capture. In addition, jRapture has a profiling interface that permits a
    Java program to be instrumented for profiling ---  after its executions have
    been captured. Using an XML-based profiling specification language a
    tester can specify various forms of profiling to be carried out during
    replay.",
}


@InProceedings{NarayanasamyPC2005,
  author = 	 "Satish Narayanasamy and Gilles Pokam and Brad Calder",
  title = 	 "{BugNet}: Continuously recording program execution for deterministic replay debugging",
  crossref =     "ISCA2005",
  pages = 	 "284--295",
  abstract =
   "Significant time is spent by companies trying to reproduce and fix the
    bugs that occur for released code. To assist developers, we propose the
    BugNet architecture to continuously record information on production
    runs. The information collected before the crash of a program can be used
    by the developers working in their execution environment to
    deterministically replay the last several million instructions executed
    before the crash. BugNet is based on the insight that recording the
    register file contents at any point in time, and then recording the load
    values that occur after that point can enable deterministic replaying of a
    program's execution. BugNet focuses on being able to replay the
    application's execution and the libraries it uses, but not the operating
    system. But our approach provides the ability to replay an application's
    execution across context switches and interrupts. Hence, BugNet obviates
    the need for tracking program I/O, interrupts and DMA transfers, which
    would have otherwise required more complex hardware support. In addition,
    BugNet does not require a final core dump of the system state for
    replaying, which significantly reduces the amount of data that must be
    sent back to the developer.",
}


@Article{NarayanasamyPC2006,
  author = 	 "Satish Narayanasamy and Gilles Pokam and Brad Calder",
  title = 	 "{BugNet}: Recording application-level execution for deterministic replay debugging",
  journal = 	 "IEEE Micro",
  year = 	 2006,
  volume = 	 26,
  number = 	 1,
  pages = 	 "100--109",
  abstract =
   "With software's increasing complexity, providing efficient hardware
    support for software debugging is critical. Hardware support is
    necessary to observe and capture, with little or no overhead, the exact
    execution of a program.  Providing this ability to developers will
    allow them to deterministically replay and debug an application to
    pin-point the root cause of a bug.",
}


@InProceedings{GeelsASS2006,
  author = 	 "Dennis Geels and Gautam Altekar and Scott Shenker and Ion Stoica",
  title = 	 "Replay debugging for distributed applications",
  crossref =     "USENIX2006",
  pages = 	 "289--300",
  abstract =
   "We have developed a new replay debugging tool, liblog, for distributed
    C/C++ applications. It logs the execution of deployed application processes
    and replays them deterministically, faithfully reproducing race conditions
    and non-deterministic failures, enabling careful offline analysis. To our
    knowledge, liblog is the first replay tool to address the requirements of
    large distributed systems: lightweight support for long-running programs,
    consistent replay of arbitrary subsets of application nodes, and operation
    in a mixed environment of logging and non-logging processes. In addition,
    it requires no special hardware or kernel patches, supports unmodified
    application executables, and integrates GDB into the replay mechanism for
    simultaneous source-level debugging of multiple processes. This paper
    presents liblog's design, an evaluation of its runtime overhead, and a
    discussion of our experience with the tool to date.",
}


@InProceedings{XuBH2003,
  author = 	 "Min Xu and Rastislav Bodik and Mark D. Hill",
  title = 	 "A ``flight data recorder'' for enabling full-system multiprocessor deterministic replay",
  crossref =     "ISCA2003",
  pages = 	 "122--135",
}


@InProceedings{DunlapKCBC2002,
  author = 	 "George W. Dunlap and Samuel T. King and Sukru Cinar and Murtaza A. Basrai and Peter M. Chen",
  title = 	 "{ReVirt}: Enabling intrusion analysis through virtual-machine logging and replay",
  crossref =     "OSDI2002",
  pages = 	 "211--224",
  abstract =
   "Current system loggers have two problems: they depend on the integrity of
    the operating system being logged, and they do not save sufficient
    information to replay and analyze attacks that include any
    non-deterministic events. ReVirt removes the dependency on the target
    operating system by moving it into a virtual machine and logging below
    the virtual machine. This allows ReVirt to replay the system's execution
    before, during, and after an intruder compromises the system, even if the
    intruder replaces the target operating system. ReVirt logs enough
    information to replay a long-term execution of the virtual machine
    instruction-by-instruction. This enables it to provide arbitrarily
    detailed observations about what transpired on the system, even in the
    presence of non-deterministic attacks and executions. ReVirt adds
    reasonable time and space overhead. Overheads due to virtualization are
    imperceptible for interactive use and CPU-bound workloads, and 13--58\%
    for kernel-intensive workloads. Logging adds 0--8\% overhead, and logging
    traffic for our workloads can be stored on a single disk for several
    months.",
}


@InProceedings{deOliveriaCWWSC2006,
  author = 	 "Daniela A. S. de Oliveira and Jedidiah R. Crandall and Gary Wassermann and S. Felix Wu and Zhendong Su and Frederic T. Chong",
  title = 	 "{ExecRecorder}: {VM}-based full-system replay for attack analysis and system recovery",
  crossref =     "ASID2006",
  pages = 	 "66--71",
 abstract=
   "Log-based recovery and replay systems are important for system
    reliability, debugging and postmortem analysis/recovery of malware
    attacks. These systems must incur low space and performance overhead,
    provide full-system replay capabilities, and be resilient against
    attacks. Previous approaches fail to meet these requirements: they replay
    only a single process, or require changes in the host and guest OS, or do
    not have a fully-implemented replay component. This paper studies
    full-system replay for uniprocessors by logging and replaying
    architectural events. To limit the amount of logged information, we
    identify architectural nondeterministic events, and encode them
    compactly. Here we present ExecRecorder, a full-system, VM-based, log and
    replay framework for post-attack analysis and recovery. ExecRecorder can
    replay the execution of an entire system by checkpointing the system state
    and logging architectural nondeterministic events, and imposes low
    performance overhead (less than 4\% on average).  In our evaluation its
    log files grow at about 5.4 GB/hour (arithmetic mean). Thus it is
    practical to log on the order of hours or days between checkpoints. It can
    also be integrated naturally with an IDS and a post-attack analysis tool
    for intrusion analysis and recovery.",
}


@Article{LeBlancC1987,
  author = 	 "T. J. LeBlanc and J. M. Mellor-Crummey",
  title = 	 "Debugging parallel programs with instant replay",
  journal = 	 IEEETC,
  year = 	 1987,
  volume = 	 36,
  number = 	 4,
  pages = 	 "471--482",
  month = 	 apr,
  abstract =
   "The debugging cycle is the most common methodology for finding and
    correcting errors in sequential programs. Cyclic debugging is effective
    because sequential programs are usually deterministic. Debugging parallel
    programs is considerably more difficult because successive executions of
    the same program often do not produce the same results.  During program
    execution we save the relative order of significant events as they occur,
    not the data associated with such events. As a result, our approach
    requires less time and space to save the information needed for program
    replay than other methods. Our technique is not dependent on any
    particular form of interprocess communication. It provides for replay of
    an entire program, rather than individual processes in isolation . No
    centralized bottlenecks are introduced and there is no need for
    synchronized clocks or a globally-consistent logical time. The authors
    describe a prototype implementation of Instant Replay on the BBN Butterfly
    Parallel Processor, and discuss how it can be incorporated into the
    debugging cycle for parallel programs.",
}


@InProceedings{SrinivasanKAZ2004,
  author = 	 "Sudarshan M. Srinivasan and Srikanth Kandula and Christopher R. Andrews and Yuanyuan Zhou",
  title = 	 "Flashback: A lightweight extension for rollback and deterministic replay for software debugging",
  crossref =     "USENIX2004",
  pages = 	 "29--44",
  abstract =
   "Software robustness has significant impact on system
    availability. Unfortunately, finding software bugs is a very challenging
    task because many bugs are hard to reproduce.  While debugging a program,
    it would be very useful to rollback a crashed program to a previous
    execution point and deterministically re-execute the buggy code region.
    However, most previous work on rollback and replay support was designed to
    survive hardware or operating system failures, and is therefore too
    heavyweight for the fine-grained rollback and replay needed for software
    debugging.
    \par
    This paper presents Flashback, a lightweight OS extension that provides
    fine-grained rollback and replay to help debug software. Flashback uses
    shadow processes to efficiently roll back in-memory state of a process,
    and logs a process' interactions with the system to support deterministic
    replay. Both shadow processes and logging of system calls are implemented
    in a lightweight fashion specifically designed for the purpose of software
    debugging.
    \par
    We have implemented a prototype of Flashback in the Linux operating
    system. Our experimental results with micro-benchmarks and real
    applications show that Flashback adds little overhead and can quickly roll
    back a debugged program to a previous execution point and
    deterministically replay from that point.",
}


@InProceedings{TanejaX2008,
  author = 	 "Kunal Taneja and Tao Xie",
  title = 	 "{DiffGen}: Automated Regression Unit-Test Generation",
  crossref =     "ASE2008",
  pages = 	 "407--410",
  abstract =
   "Software programs continue to evolve throughout their lifetime. Maintenance
    of such evolving programs, including regression testing, is one of the most
    expensive activities in software development. We present an approach and
    its implementation called DiffGen for automated regression unit-test
    generation and checking for Java programs. Given two versions of a Java
    class, our approach instruments the code by adding new branches such that
    if these branches can be covered by a test generation tool, behavioral
    differences between the two class versions are exposed. DiffGen then uses a
    coverage-based test generation tool to generate test inputs for covering
    the added branches to expose behavioral differences. We have evaluated
    DiffGen on finding behavioral differences between 21 classes and their
    versions. Experimental results show that our approach can effectively
    expose many behavioral differences that cannot be exposed by
    state-of-the-art techniques.",
}


@Article{ChenZGLWC2015,
  author = 	 "Chen, Yunji and Zhang, Shijin and Guo, Qi and Li, Ling and Wu, Ruiyang and Chen, Tianshi",
  title = 	 "Deterministic replay: A survey",
  journal = 	 acmcs,
  year = 	 2015,
  volume = 	 48,
  number = 	 2,
  month = 	 sep,
  articleno = 17,
  numpages = 47,
  abstract =
   "Deterministic replay is a type of emerging technique dedicated to providing
    deterministic executions of computer programs in the presence of
    nondeterministic factors. The application scopes of deterministic replay
    are very broad, making it an important research topic in domains such as
    computer architecture, operating systems, parallel computing, distributed
    computing, programming languages, verification, and hardware testing.In
    this survey, we comprehensively review existing studies on deterministic
    replay by introducing a taxonomy. Basically, existing deterministic replay
    schemes can be classified into two categories, single-processor (SP)
    schemes and multiprocessor (MP) schemes. By reviewing the details of these
    two categories of schemes respectively, we summarize and compare how
    existing schemes address technical issues such as log size, record
    slowdown, replay slowdown, implementation cost, and probe effect, which may
    shed some light on future studies on deterministic replay."
}


%%%
%%% Clark and Tracey and University of York
%%%


@InProceedings{1998:ase:tracey,
  author =       "Nigel Tracey and John Clark and Keith Mander and John McDermid",
  title =        "An Automated Framework for Structural Test-Data Generation",
  crossref =     "ASE98",
  pages =        "285--288",
}


@Article{FergusonK96,
  author = 	 "Roger Ferguson and Bogdan Korel",
  title = 	 "The chaining approach for software test data generation",
  journal = 	 TOSEM,
  year = 	 1996,
  volume =	 5,
  number =	 1,
  pages =	 "63--86",
  month =	 jan
}


@InProceedings{226319,
 author = {Bogdan Korel},
 title = {Automated test data generation for programs with procedures},
 booktitle = {Proceedings of the 1996 ACM SIGSOFT international symposium on Software testing and analysis},
 year = {1996},
 ISBN = {0-89791-787-1},
 pages = {209--215},
 address  = {San Diego, California, United States},
 doi = {https://doi.acm.org/10.1145/229000.226319},
 }


@Article{Korel90,
  author = 	 "Bogdan Korel",
  title = 	 "Automated software test data generation",
  journal = 	 TSE,
  year = 	 1990,
  volume =	 16,
  number =	 8,
  pages =	 "870--879",
  month =	 aug
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Regression test selection/reduction and prioritization
%%%


@Article{YooH2012,
  author = 	 "Shin Yoo and Mark Harman",
  title = 	 "Regression testing minimization, selection and prioritization: A survey",
  journal = 	 STVR,
  year = 	 2012,
  volume = 	 22,
  number = 	 2,
  pages = 	 "67--120",
  month = 	 mar,
}


@InProceedings{KimPR2000,
  author =       {Jung-Min Kim and Adam Porter and Gregg Rothermel},
  title =        {An empirical study of regression test application frequency},
  crossref =     "ICSE2000",
  pages =        {126--135},
}


@Article{HarroldGS93:TOSEM,
  author =       "Mary Jean Harrold and Rajiv Gupta and Mary Lou Soffa",
  title =        "A methodology for controlling the size of a test suite",
  year =         1993,
  journal =      TOSEM,
  volume =       2,
  pages =        "270--285",
  abstract =     "This paper presents a technique to select a
                 representative set of test cases from a test suite that
                 provides the same coverage as the entire test suite.
                 This selection is performed by identifying, and then
                 eliminating, the redundant and obsolete test cases in
                 the test suite. The representative set replaces the
                 original test suite and thus, potentially produces a
                 smaller test suite. The representative set can also be
                 used to identify those test cases that should be rerun
                 to test the program after it has been changed. Our
                 technique is independent of the testing methodology and
                 only requires an association between a testing
                 requirement and the test cases that satisfy the
                 requirement. We illustrate the technique using the data
                 flow testing methodology. The reduction that is
                 possible with our technique is illustrated by
                 experimental results.",
  keywords =     "Hitting Set; Maintenance; regression testing; software
                 maintenance; software engineering; test suite
                 reduction; testing",
  number =       3,
  month =        jul,
  references =   18,
}

@Article{RothermelH96,
  author = 	 "Gregg Rothermel and Mary Jean Harrold",
  title = 	 "Analyzing regression test selection techniques",
  journal = 	 TSE,
  year = 	 1996,
  volume =	 22,
  number =	 8,
  pages =	 "529--551",
  month =	 aug
}

@InProceedings{LeungW89,
  author = 	 "Hareton K. N. Leung and Lee White",
  title = 	 "Insights into regression testing",
  crossref =     "ICSM89",
  pages =	 "60--69",
}


@InProceedings{1998:icsm:vokolos,
  author =       "Filippos I. Vokolos and Phyllis G. Frankl",
  title =        "Empirical evaluation of the textual differencing
                 regression testing technique",
  crossref =     "ICSM98",
  pages =        "44--53",
  annote =       "incomplete",
}


@Article{PasquiniCM96,
  author = 	 "Alberto Pasquini and Adalberto Nobiato Crespo and Paolo Matrella",
  title = 	 "Sensitivity of reliability-growth models to operational
                  profile errors vs testing accuracy",
  journal = 	 "IEEE Transactions on Reliability",
  year = 	 1996,
  volume =	 "45",
  number =	 4,
  pages =	 "531--540",
  month =	 dec,
  abstract =
   "This paper investigates: 1) the sensitivity of reliability-growth models to
    errors in the estimate of the operational profile (OP); and 2) the relation
    between this sensitivity and the testing accuracy for computer
    software. The investigation is based on the results of a case study in
    which several reliability-growth models are applied during the testing
    phase of a software system. The faults contained in the system are known in
    advance; this allows measurement of the software reliability-growth and
    comparison with the estimates provided by the models. Measurement and
    comparison are repeated for various OPs, thus giving information about the
    effect of a possible error in the estimate of the OP. The results show
    that: 1) the predictive accuracy of the models is not heavily affected by
    errors in the estimate of the OP; and 2) this relation depends on the
    accuracy with which the software system has been tested."
}

@Article{PasquiniCM97,
  author = 	 "Alberto Pasquini and Adalberto Nobiato Crespo and Paolo Matrella",
  title = 	 "Changes to:  Sensitivity of reliability-growth models to
                  operational profile errors vs testing accuracy",
  journal = 	 "IEEE Transactions on Reliability",
  year = 	 1997,
  volume =	 "46",
  number =	 1,
  pages =	 "68",
  month =	 mar,
}


@Article{Graves:2001:ESR,
  author =       "Todd L. Graves and Mary Jean Harrold and Jung-Min Kim
                 and Adam Porter and Gregg Rothermel",
  title =        "An empirical study of regression test selection
                 techniques",
  journal =      TOSEM,
  volume =       "10",
  number =       "2",
  pages =        "184--208",
  month =        apr,
  year =         "2001",
  coden =        "ATSMER",
  ISSN =         "1049-331X",
  bibdate =      "Fri Apr 20 08:21:35 MDT 2001",
  url =          "http://www.acm.org/pubs/articles/journals/tosem/2001-10-2/p184-graves/p184-graves.pdf;
                 http://www.acm.org/pubs/citations/journals/tosem/2001-10-2/p184-graves/",
  abstract =     "Regression testing is the process of validating
                 modified software to detect whether new errors have
                 been introduced into previously tested code and to
                 provide confidence that modifications are correct.
                 Since regression testing is an expensive process,
                 researchers have proposed regression test selection
                 techniques as a way to reduce some of this expense.
                 These techniques attempt to reduce costs by selecting
                 and running only a subset of the test cases in a
                 program's existing test suite. Although there have been
                 some analytical and empirical evaluations of individual
                 techniques, to our knowledge only one comparative
                 study, focusing on one aspect of two of these
                 techniques, has been reported in the literature. We
                 conducted an experiment to examine the relative costs
                 and benefits of several regression test selection
                 techniques. The experiment examined five techniques for
                 reusing test cases, focusing on their relative
                 abilities to reduce regression testing effort and
                 uncover faults in modified programs. Our results
                 highlight several differences between the techniques,
                 and expose essential trade-offs that should be
                 considered when choosing a technique for practical
                 application.",
  acknowledgement = ack-nhfb,
  keywords =     "empirical study; regression testing; selective
                 retest",
  subject =      "Software --- Software Engineering --- Testing and
                 Debugging (D.2.5): {\bf Testing tools (e.g., data
                 generators, coverage testing)}; Software --- Software
                 Engineering --- Testing and Debugging (D.2.5): {\bf
                 Debugging aids}",
}


@InProceedings{SrivastavaT2002:ISSTA,
  author = 	 "Amitabh Srivastava and Jay Thiagarajan",
  title = 	 "Effectively prioritizing tests in development environment",
  crossref =     "ISSTA2002",
  pages =	 "97--106",
  abstract =
   "Software testing helps ensure not only that the software under development
    has been implemented correctly, but also that further development does not
    break it. If developers introduce new defects into the software, these
    should be detected as early and inexpensively as possible in the
    development cycle. To help optimize which tests are run at what points in
    the design cycle, we have built Echelon, a test prioritization system,
    which prioritizes the application?s given set of tests, based on what
    changes have been made to the program. Echelon builds on the previous work
    on test prioritization and proposes a practical binary code based approach
    that scales well to large systems. Echelon utilizes a binary matching
    system that can accurately compute the differences at a basic block
    granularity between two versions of the program in binary form. Echelon
    utilizes a fast, simple and intuitive heuristic that works well in practice
    to compute what tests will cover the affected basic blocks in the
    program. Echelon orders the given tests to maximally cover the affected
    program so that defects are likely to be found quickly and
    inexpensively. Although the primary focus in Echelon is on program changes,
    other criteria can be added in computing the priorities. Echelon is part of
    a test effectiveness infrastructure that runs under the Windows
    environment. It is currently being integrated into the Microsoft software
    development process. Echelon has been tested on large Microsoft product
    binaries. The results show that Echelon is effective in ordering tests
    based on changes between two program versions."
}


@InProceedings{WongHLA97,
  author = 	 "W. Eric Wong and Joseph R. Horgan and Saul London and Hira Agrawal",
  title = 	 "A study of effective regression testing in practice",
  crossref =     "ISSRE97",
  pages =	 "264--274",
}


@Article{RothermelUCH2001,
  author = 	 "Gregg Rothermel and Roland H. Untch and Chengyun Chu and
                  Mary Jean Harrold",
  title = 	 "Prioritizing test cases for regression testing",
  journal = 	 TSE,
  year = 	 2001,
  volume =	 27,
  number =	 10,
  pages =	 "929--948",
  month =	 oct
}


@InProceedings{ElbaumMR2000,
  author = 	 "Sebastian Elbaum and Alexey G. Malishevsky and Gregg Rothermel",
  title = 	 "Prioritizing test cases for regression testing",
  crossref =     "ISSTA2000",
  pages =	 "102--112",
}


@InProceedings{ElbaumMR2001,
  author = 	 "Sebastian Elbaum and Alexey Malishevsky and Gregg Rothermel",
  title = 	 "Incorporating varying test costs and fault severities
                  into test case prioritization",
  crossref =     "ICSE2001",
  pages =	 "329--338",
  abstract =
   "Test case prioritization techniques schedule test cases for regression
    testing in an order that increases their ability to meet some performance
    goal. One performance goal, rate of fault detection, measures how quickly
    faults are detected within the testing process. In previous work we
    provided a metric, APFD, for measuring rate of fault detection, and
    techniques for prioritizing test cases to improve APFD, and reported the
    results of experiments using those techniques. This metric and these
    techniques, however, applied only in cases in which test costs and fault
    severity are uniform. In this paper, we present a new metric for assessing
    the rate of fault detection of prioritized test cases, that incorporates
    varying test case and fault costs. We present the results of a case study
    illustrating the application of the metric. This study raises several
    practical questions that might arise in applying test case prioritization;
    we discuss how practitioners could go about answering these questions."
}


@InProceedings{RothermelUCH1999,
  author = 	 "Gregg Rothermel and Roland H. Untch and Chengyun Chu and Mary Jean Harrold",
  title = 	 "Test case prioritization: An empirical study",
  crossref =     "ICSM99",
  pages =	 "179--",
  supersededby = "RothermelUCH2001",
}


@Article{RothermelEMKQ2004,
  author = 	 "Gregg Rothermel and Sebastian Elbaum and Alexey G. Malishevsky and Praveen Kallakuri and Xuemei Qiu",
  title = 	 "On test suite composition and cost-effective regression testing",
  journal = 	 TOSEM,
  year = 	 2004,
  volume = 	 13,
  number = 	 3,
  pages = 	 "277--331",
  month = 	 jul,
  abstract =
   "Regression testing is an expensive testing process used to revalidate
    software as it evolves. Various methodologies for improving regression
    testing processes have been explored, but the cost-effectiveness of these
    methodologies has been shown to vary with characteristics of regression
    test suites. One such characteristic involves the way in which test inputs
    are composed into test cases within a test suite. This article reports the
    results of controlled experiments examining the effects of two factors in
    test suite composition---test suite granularity and test input
    grouping---on the costs and benefits of several regression-testing-related
    methodologies: retest-all, regression test selection, test suite reduction,
    and test case prioritization. These experiments consider the application of
    several specific techniques, from each of these methodologies, across ten
    releases each of two substantial software systems, using seven levels of
    test suite granularity and two types of test input grouping. The effects of
    granularity, technique, and grouping on the cost and fault-detection
    effectiveness of regression testing under the given methodologies are
    analyzed. This analysis shows that test suite granularity significantly
    affects several cost-benefit factors for the methodologies considered,
    while test input grouping has limited effects. Further, the results expose
    essential tradeoffs affecting the relationship between test suite design
    and regression testing cost-effectiveness, with several implications for
    practice.",
}


@Article{ElbaumMR2002,
  author = 	 "Sebastian Elbaum and Alexy G. Malishevsky and Gregg Rothermel",
  title = 	 "Test case prioritization: A family of empirical studies",
  journal = 	 TSE,
  year = 	 2002,
  volume =	 28,
  number =	 2,
  pages =	 "159--182",
  month =	 feb
}


@InProceedings{MalishevskyRE2002,
  author = 	 "Alexey G. Malishevsky and Gregg Rothermel and Sebastian Elbaum",
  title = 	 "Modeling the cost-benefits tradeoffs for regression
                  testing techniques",
  crossref =     "ICSM2002",
  pages =	 "204--213",
  abstract =
   "Regression testing is an expensive activity that can account for a large
    proportion of the software maintenance budget. Because engineers add tests
    into test suites as software evolves, over time, increased test suite size
    makes revalidation of the software more expensive. Regression test
    selection, test suite reduction, and test case prioritization techniques
    can help with this, by reducing the number of regression tests that must be
    run and by helping testers meet testing objectives more quickly. These
    techniques, however, can be expensive to employ and may not reduce overall
    regression testing costs. Thus, practitioners and researchers could benefit
    from cost models that would help them assess the cost-benefits of
    techniques. Cost models have been proposed for this purpose, but some of
    these models omit important factors, and others cannot truly evaluate
    cost-effectiveness. In this paper, we present new cost-benefits models for
    regression test selection, test suite reduction, and test case
    prioritization, that capture previously omitted factors, and support
    cost-benefits analyses where they were not supported before. We present the
    results of an empirical study assessing these models.",
}


@InProceedings{JonesH01,
  author = 	 "James A. Jones and Mary Jean Harrold",
  title = 	 "Test-Suite Reduction and Prioritization for Modified Condition/Decision Coverage",
  crossref =     "ICSM2001",
  pages =	 "92--101",
  abstract =
   "Software testing is particularly expensive for developers of high-assurance
    software, such as software that is produced for commercial airborne
    systems. One reason for this expense is the Federal Aviation
    Administration's requirement that test suites be modified
    condition/decision coverage (MC/DC) adequate. Despite its cost, there is
    evidence that MC/DC is an effective verification technique, and can help to
    uncover safety faults. As the software is modified and new test cases are
    added to the test suite, the test suite grows, and the cost of regression
    testing increases. To address the test-suite size problem, researchers have
    investigated the use of test-suite reduction algorithms, which identify a
    reduced test suite that provides the same coverage of the software,
    according to some criterion, as the original test suite, and test-suite
    prioritization algorithms, which identify an ordering of the test cases in
    the test suite according to some criteria or goals. Existing test-suite
    reduction and prioritization techniques, however, may not be effective in
    reducing or prioritizing MC/DC-adequate test suites because they do not
    consider the complexity of the criterion. This paper presents new
    algorithms for test-suite reduction and prioritization that can be tailored
    effectively for use with MC/DC. The paper also presents the results of a
    case study of the test-suite reduction algorithm."
}


@article{367881,
 author = {Mary Jean Harrold and David Rosenblum and Gregg Rothermel and Elaine Weyuker},
 title = {Empirical Studies of a Prediction Model for Regression Test Selection},
 journal = TSE,
 volume = {27},
 number = {3},
 year = {2001},
 ISSN = {0098-5589},
 pages = {248--263},
 doi = {http://dx.doi.org/10.1109/32.910860},
 publisher = {IEEE Press},
 }


@InProceedings{853227,
 author = {D. Binkley},
 title = {Reducing the cost of regression testing by semantics guided test case selection},
 booktitle = {Proceedings of the International Conference on Software Maintenance},
 year = {1995},
 ISBN = {0-8186-7141-6},
 pages = {251},
 publisher = {IEEE Computer Society},
 }


@Article{Binkley97,
  author = 	 "David Binkley",
  title = 	 "Semantics Guided Regression Test Cost Reduction",
  journal = 	 TSE,
  year = 	 1997,
  volume =	 23,
  number =	 8,
  pages =	 "498--516",
  month =	 nov # "~9--12,"
}


@InProceedings{RenSTRC2004,
  author = 	 "Xiaoxia Ren and Fenil Shah and Frank Tip and Barbara
                  Ryder and Ophelia Chesley",
  title = 	 "Chianti: A tool for change impact analysis of {Java} programs",
  crossref =     "OOPSLA2004",
  pages = 	 "432--448",
  abstract =
   "This paper reports on the design and implementation of Chianti, a change
    impact analysis tool for Java that is implemented in the context of the
    Eclipse environment. Chianti analyzes two versions of an application and
    decomposes their difference into a set of atomic changes. Change impact is
    then reported in terms of affected (regression or unit) tests whose
    execution behavior may have been modified by the applied changes. For each
    affected test, Chianti also determines a set of affecting changes that were
    responsible for the test's modified behavior. This latter step of isolating
    the changes that induce the failure of one specific test from those changes
    that only affect other tests can be used as a debugging technique in
    situations where a test fails unexpectedly after a long editing
    session. We evaluated Chianti on a year (2002) of CVS data from M. Ernst's
    Daikon system, and found that, on average, 52\% of Daikon's unit tests are
    affected. Furthermore, each affected unit test, on average, is affected by
    only 3.95\% of the atomic changes. These findings suggest that our change
    impact analysis is a promising technique for assisting developers with
    program understanding and debugging.",
  usesDaikonAsTestSubject = 1,
  downloadsnonlocal = "https://prolangs.cs.vt.edu/refs/docs/oopsla04.pdf PDF",
}


@TechReport{RenSTRC2004:TR,
  author = 	 "Xiaoxia Ren and Fenil Shah and Frank Tip and Barbara
                  Ryder and Ophelia Chesley",
  title = 	 "Chianti: A tool for change impact analysis of {Java} programs",
  institution =  "Rutgers University Department of Computer Science",
  year = 	 2004,
  number =	 "DCS-TR-551",
  NEEDaddress = 	 "",
  month =	 apr,
  supersededby = "RenSTRC2004",
  usesDaikonAsTestSubject = 1,
}


@TechReport{RenSTRCD2003,
  author = 	 "Xiaoxia Ren and Fenil Shah and Frank Tip and Barbara
                  Ryder and Ophelia Chesley and Julian Dolby",
  title = 	 "Chianti: A prototype change impact analysis tool for {Java}",
  institution =  "Rutgers University Department of Computer Science",
  year = 	 2003,
  number =	 "DCS-TR-533",
  NEEDaddress = 	 "",
  month =	 sep,
  supersededby = "RenSTRC2004:TR",
  usesDaikonAsTestSubject = 1,
}


@InProceedings{StoerzerRRT2006,
  author = 	 "Maximilian Stoerzer and Barbara G. Ryder and Xiaoxia Ren and Frank Tip",
  title = 	 "Finding failure-inducing changes in {Java} programs using change classification",
  crossref =     "FSE2006",
  pages = 	 "57--68",
  usesDaikonAsTestSubject = 1,
  downloads = "https://prolangs.cs.vt.edu/rutgers/refs/docs/dcs-tr-582.pdf PDF",
  ALTERNATEdownload = "https://cs.uwaterloo.ca/~ftip/pubs/fse2006.pdf",
  abstract =
   "Testing and code editing are interleaved activities during program
    development. When tests fail unexpectedly, the changes that caused
    the failure(s) are not always easy to find. We explore how change
    classification can focus programmer attention on failure-inducing
    changes by automatically labeling changes Red, Yellow, or Green,
    indicating the likelihood that they have contributed to a test failure.
    We implemented our change classification tool JUnit/CIA as an extension
    to the JUnit component within Eclipse, and evaluated its
    effectiveness in two case studies. Our results indicate that change
    classification is an effective technique for finding failure-inducing
    changes.",
}


@TechReport{StoerzerRRT2005,
  author = 	 "Maximilian Stoerzer and Barbara G. Ryder and Xiaoxia Ren and Frank Tip",
  title = 	 "Finding failure-inducing changes using change classification",
  institution =  "Rutgers University Department of Computer Science",
  year = 	 2005,
  number = 	 "DCS-TR-582",
  month = 	 sep,
  usesDaikonAsTestSubject = 1,
  supersededby = "StoerzerRRT2006",
}


@InProceedings{SinhaOH2004,
  author = 	 "Saurabh Sinha and Alessandro Orso and Mary Jean Harrold",
  title = 	 "Automated support for development, maintenance, and
                  testing in the presence of implicit control flow",
  crossref =     "ICSE2004",
  pages = 	 "336--345",
  abstract =
   "Although object-oriented languages can improve programming practices, their
    characteristics may introduce new problems for software engineers. One
    important problem is the presence of implicit control flow caused by
    exception handling and polymorphism. Implicit control flow causes complex
    interactions, and can thus complicate software-engineering tasks. To
    address this problem, we present a systematic and structured approach, for
    supporting these tasks, based on the static and dynamic analyses of
    constructs that cause implicit control flow. Our approach provides software
    engineers with information for supporting and guiding development and
    maintenance tasks. We also present empirical results to illustrate the
    potential usefulness of our approach. Our studies show that, for the
    subjects considered, complex implicit control flow is always present and is
    generally not adequately exercised.",
  usesDaikonAsTestSubject = 1,
  downloadsnonlocal =
    "https://faculty.cc.gatech.edu/~orso/papers/sinha.orso.harrold.ICSE04.pdf",
}


@InProceedings{HarroldJLLOPSSG2001,
  author = 	 "Harrold, Mary Jean and Jones, James A. and Li, Tongyu and Liang, Donglin and Orso, Alessandro and Pennings, Maikel and Sinha, Saurabh and Spoon, S. Alexander and Gujarathi, Ashish",
  title = 	 "Regression test selection for {Java} software",
  crossref =     "OOPSLA2001",
  pages = 	 "312--326",
}

@InProceedings{OrsoSH2004,
  author = 	 "Alessandro Orso and Nanjuan Shi and Mary Jean Harrold",
  title = 	 "Scaling regression testing to large software systems",
  crossref =     "FSE2004",
  pages =	 "241--251",
  abstract =
   "When software is modified, during development and maintenance, it is
    \emph{regression tested} to provide confidence that the changes did not
    introduce unexpected errors and that new features behave as expected. One
    important problem in regression testing is how to select a subset of test
    cases, from the test suite used for the original version of the software,
    when testing a modified version of the software. Regression-test-selection
    techniques address this problem. Safe regression-test-selection techniques
    select every test case in the test suite that may behave differently in the
    original and modified versions of the software. Among existing safe
    regression testing techniques, efficient techniques are often too imprecise
    and achieve little savings in testing effort, whereas precise techniques
    are too expensive when used on large systems. This paper presents a new
    regression-test-selection technique for Java programs that is safe,
    precise, and yet scales to large systems. It also presents a tool that
    implements the technique and studies performed on a set of subjects ranging
    from 70 to over 500 KLOC. The studies show that our technique can
    efficiently reduce the regression testing effort and, thus, achieve
    considerable savings.",
  usesDaikonAsTestSubject = 1,
  downloads = "https://faculty.cc.gatech.edu/~orso/papers/orso.shi.harrold.FSE04.pdf PDF",
}


@InProceedings{LeonP2003,
  author = 	 "David Leon and Andy Podgurski",
  title = 	 "A comparison of coverage-based and distribution-based
                  techniques for filtering and prioritizing test cases",
  crossref =     "ISSRE2003",
  pages =	 "442--453",
  abstract =
   "This paper presents an empirical comparison of four different techniques
    for filtering large test suites: test suite minimization, prioritization by
    additional coverage, cluster filtering with one-per-cluster sampling, and
    failure pursuit sampling. The first two techniques are based on selecting
    subsets that maximize code coverage as quickly as possible, while the
    latter two are based on analyzing the distribution of the tests' execution
    profiles. These techniques were compared with data sets obtained from three
    large subject programs: the GCC, Jikes, and javac compilers. The results
    indicate that distribution-based techniques can be as efficient or more
    efficient for revealing defects than coverage-based techniques, but that
    the two kinds of techniques are also complementary in the sense that they
    find different defects. Accordingly, some simple combinations of these
    techniques were evaluated for use in test case prioritization. The results
    indicate that these techniques can create more efficient prioritizations
    than those generated using prioritization by additional coverage."
}


@InProceedings{LeonMP2005,
  author = 	 "David Leon and Wes Masri and Andy Podgurski",
  title = 	 "An empirical evaluation of test case filtering techniques
                  based on exercising complex information flows",
  crossref =     "ICSE2005",
  pages =	 "412--421",
  abstract =
   "Some software defects trigger failures only when certain complex
    information flows occur within the software. Profiling and analyzing such
    flows therefore provides a potentially important basis for filtering test
    cases. We report the results of an empirical evaluation of several test
    case filtering techniques that are based on exercising complex information
    flows. Both coverage-based and profile-distribution-based filtering
    techniques are considered. They are compared to filtering techniques based
    on exercising basic blocks, branches, function calls, and def-use pairs,
    with respect to their effectiveness for revealing defects."
}


@InProceedings{MarianiPP2007,
  author = 	 "Leonardo Mariani and Sofia Papagiannakis and Mauro Pezz{\`e}",
  authorASCII =  "Leonardo Mariani and Sofia Papagiannakis and Mauro Pezze",
  title = 	 "Compatibility and regression testing of {COTS}-component-based software",
  crossref =     "ICSE2007",
  pages = 	 "85--95",
  usesDaikon = 1,
  usesDaikon =	 1,
  OLDdownloads =    "http://www.lta.disco.unimib.it/lta/uploads/papers/Mariani-COTS-ICSE-2007.pdf PDF",
}


@InProceedings{QuCR2008,
  author = 	 "Xiao Qu and Myra B. Cohen and Gregg Rothermel",
  title = 	 "Configuration-aware regression testing: an empirical study of sampling and prioritization",
  crossref =     "ISSTA2008",
  pages = 	 "75--86",
}


@InProceedings{ElbaumRP2014,
  author = 	 "Elbaum, Sebastian and Rothermel, Gregg and Penix, John",
  title = 	 "Techniques for improving regression testing in continuous integration development environments",
  crossref =     "FSE2014",
  pages = 	 "235--245",
}


@InProceedings{HerzigGCM2015,
  author = 	 "Kim Herzig and Michaela Greiler and Jacek Czerwonka and Brendan Murphy",
  title = 	 "The art of testing less without sacrificing quality",
  crossref =     "ICSE2015",
  pages = 	 "483-493",
}
@Misc{Herzig2016,
  author =    "Kim Herzig",
  title =     "Personal communication",
  year =      2016,
}


@InProceedings{RummelKT2005,
  author = 	 "Rummel, Matthew J. and Kapfhammer, Gregory M. and Thall, Andrew",
  title = 	 "Towards the prioritization of regression test suites with data flow information",
  crossref =     "SAC2005",
  pages = 	 "1499--1504",
}


@Article{BriandLH2009,
  author = 	 "Briand, L. C. and Labiche, Y. and He, S.",
  title = 	 "Automating regression test selection based on {UML} designs",
  journal = 	 IST,
  year = 	 2009,
  volume = 	 51,
  number = 	 1,
  pages = 	 "16--30",
  month = 	 jan,
}

@InProceedings{CzerwonkaDNTT2011,
  author = 	 "Czerwonka, Jacek and Das, Rajiv and Nagappan, Nachiappan and Tarvo, Alex and Teterev, Alex",
  title = 	 "{CRANE}: Failure prediction, change analysis and test prioritization in practice --- Experiences from {Windows}",
  crossref =     "ICST2011",
  pages = 	 "357--366",
}

@InProceedings{DiNardoABL2013,
  author = 	 "Di Nardo, Daniel and Alshahwan, Nadia and Briand, Lionel and Labiche, Yvan",
  title = 	 "Coverage-based test case prioritisation: An industrial case study",
  crossref =     "ICST2013",
  pages = 	 "302--311",
}

@InProceedings{YooH2007,
  author = 	 "Yoo, Shin and Harman, Mark",
  title = 	 "Pareto efficient multi-objective test case selection",
  crossref =     "ISSTA2007",
  pages = 	 "140--150",
}

@InProceedings{BeszedesGSJLG2013,
  author = 	 "Arpad Beszedes and Tamas Gergely and Lajos Schrettner and Judit Jasz and Laszlo Lango and Tibor Gyimothy",
  title = 	 "Code coverage-based regression test selection and prioritization in WebKit",
  crossref =     "ICSM2013",
  pages = 	 "46--55",
}


@InProceedings{GligoricEM2015,
  author =       "Gligoric, Milos and Eloussi, Lamyaa and Marinov, Darko",
  title =        "Practical regression test selection with dynamic file dependencies",
  crossref =  "ISSTA2015",
  pages =     "211--222",
}


@InProceedings{LeongSPLTM2019,
  author = 	 "Leong, Claire and Singh, Abhayendra and Papadakis, Mike and Le Traon, Yves and Micco, John",
  title = 	 "Assessing transition-based test selection algorithms at {Google}",
  crossref =  "ICSESEIP2019",
  pages = 	 "101-110",
}


@InProceedings{CruciallyMVB2019,
  author = {Cruciani, Emilio and Miranda, Breno and Verdecchia, Roberto and Bertolino, Antonia},
  title = {Scalable Approaches for Test Suite Reduction},
  crossref =  "ICSE2019",
  pages = 	 "419--429",
}


@inproceedings{10.1145/2950290.2950344,
author = {Luo, Qi and Moran, Kevin and Poshyvanyk, Denys},
title = {A Large-Scale Empirical Comparison of Static and Dynamic Test Case Prioritization Techniques},
year = {2016},
isbn = {9781450342186},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi-org.offcampus.lib.washington.edu/10.1145/2950290.2950344},
doi = {10.1145/2950290.2950344},
abstract = {The large body of existing research in Test Case Prioritization (TCP) techniques, can be broadly classified into two categories: dynamic techniques (that rely on run-time execution information) and static techniques (that operate directly on source and test code). Absent from this current body of work is a comprehensive study aimed at understanding and evaluating the static approaches and comparing them to dynamic approaches on a large set of projects. In this work, we perform the first extensive study aimed at empirically evaluating four static TCP techniques comparing them with state-of-research dynamic TCP techniques at different test-case granularities (e.g., method and class-level) in terms of effectiveness, efficiency and similarity of faults detected. This study was performed on 30 real-word Java programs encompassing 431 KLoC. In terms of effectiveness, we find that the static call-graph-based technique outperforms the other static techniques at test-class level, but the topic-model-based technique performs better at test-method level. In terms of efficiency, the static call-graph-based technique is also the most efficient when compared to other static techniques. When examining the similarity of faults detected for the four static techniques compared to the four dynamic ones, we find that on average, the faults uncovered by these two groups of techniques are quite dissimilar, with the top 10\% of test cases agreeing on only 25\% - 30\% of detected faults. This prompts further research into the severity/importance of faults uncovered by these techniques, and into the potential for combining static and dynamic information for more effective approaches.},
booktitle = {Proceedings of the 2016 24th ACM SIGSOFT International Symposium on Foundations of Software Engineering},
pages = {559–570},
numpages = {12},
keywords = {dynamic, test case prioritization, Regression testing, static},
location = {Seattle, WA, USA},
series = {FSE 2016}
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Continuous testing
%%%


@InProceedings{HendersonW85,
  author = 	 "Peter Henderson and Mark Weiser",
  title = 	 "Continuous execution:  The {VisiProg} environment",
  crossref =     "ICSE85",
  pages =	 "68--74",
}


@InProceedings{plezbertdoes,
  author = 	 "Michael P. Plezbert and Ron K. Cytron",
  title = 	 "Does ``Just in Time'' = ``Better Late Than Never''?",
  crossref =     "POPL97",
  pages =	 "120--131",
}

@InProceedings{poplar,
  author = 	 "James H. Morris and Eric Schmidt and Philip Wadler",
  title = 	 "Experience with an applicative string processing language",
  crossref =     "POPL80",
  pages = 	 "32--46",
}


@book{xpexplained,
 title = {Extreme Programming Explained: Embrace Change},
 author = {Kent Beck},
 publisher = {Addison-Wesley},
 year = {1999}
}

@book{tote,
 title = {Plans and the Structure of Behavior},
 author = {George A. Miller and Eugene Galanter and Karl H. Pribram},
 publisher = {Holt, Rinehart and Winston, Inc.},
 year = {1960}
}

@PhdThesis{Miller2002:PhD,
  author = 	 "Robert C. Miller",
  title = 	 "Lightweight Structure in Text",
  school = 	 "Computer Science Department, School of Computer Science, Carnegie Mellon University",
  year = 	 2002,
  address =	 "Pittsburgh, PA",
  month =	 may,
  note =	 "Also available as CMU Computer Science technical report CMU-CS-02-134 and CMU Human-Computer Interaction Institute technical report CMU-HCII-02-103"
}

@Article{Nix85,
  author = 	 "Robert P. Nix",
  title = 	 "Editing by example",
  journal = 	 TOPLAS,
  year = 	 1985,
  volume =	 7,
  number =	 4,
  pages =	 "600--621",
  month =	 oct
}

@Book{Siegel96,
  author =	 "Shel Siegel",
  title = 	 "Object-Oriented Software Testing: A Hierarchical Approach",
  publisher = 	 "John Wiley \& Sons",
  year = 	 1996
}

@InProceedings{OrsoLHL2002,
  author = 	 "Alessandro Orso and Donglin Liang and Mary Jean Harrold
                  and Richard Lipton",
  title = 	 "Gamma System: Continuous Evolution of Software after
                  Deployment",
  crossref =     "ISSTA2002",
  pages =	 "65--69",
}


@InProceedings{OrsoAH2003,
  author = 	 "Alessandro Orso and Taweesup Apiwattanapong
                  and Mary Jean Harrold",
  title = 	 "Leveraging field data for impact analysis and regression
                  testing",
  crossref =     "FSE2003",
  pages = 	 "128-137",
  abstract =
   "Software products are often released with missing functionality, errors,
    or incompatibilities that may result in failures, inferior performances,
    or, more generally, user dissatisfaction. In previous work, we presented
    the Gamma approach, which facilitates remote analysis and measurement of
    deployed software and allows for gathering program-execution data from the
    field. In this paper, we investigate the use of the Gamma approach to
    support and improve two fundamental tasks performed by software engineers
    during maintenance: impact analysis and regression testing. We present a
    new approach that leverages field data to perform these two tasks. We also
    present a set of empirical studies that we performed to assess the
    usefulness of the approach. The studies were performed on a real subject
    and on a real user population. The results of the studies show that the use
    of field data is effective and, for the cases considered, can considerably
    affect the results of dynamic analyses. Moreover, the empirical studies
    show that the approach is also efficient: the kind of field data that we
    consider requires very limited space and little instrumentation to be
    collected.",
}


@InProceedings{LawR2003,
  author = 	 "James Law and Gregg Rothermel",
  title = 	 "Incremental dynamic impact analysis for evolving software systems",
  crossref =     "ISSRE2003",
  pages = 	 "430--441",
  abstract =
   "Impact analysis -- determining the potential effects of changes on a
    software system -- plays an important role in helping engineers re-validate
    modified software. In previous work we presented a new impact analysis
    technique, PathImpact, for performing dynamic impact analysis at the level
    of procedures, and we showed empirically that the technique can be
    cost-effective in comparison to prominent prior techniques. A drawback of
    that approach as presented, however, is that when attempting to apply the
    technique to a new version of a system as that system and its test suite
    evolves, the process of recomputing the data required by the technique for
    that version can be excessively expensive. In this paper, therefore, we
    present algorithms that allow the data needed by PathImpact to be collected
    incrementally. We present the results of a controlled experiment
    investigating the costs and benefits of this incremental approach relative
    to the approach of completely recomputing prerequisite data."
}


@Misc{junit,
  key = "JUnit",
  title = "{JUnit}",
  howpublished = "\url{http://www.junit.org}",
  URL = "http://www.junit.org"
}

@Misc{quilt,
  key = "JUnit Quilt",
  title = "JUnit Quilt",
  howpublished = "\url{http://quilt.sourceforge.net}",
  URL = "http://quilt.sourceforge.net"
}

@Misc{SoffaContinuousTesting,
  author =	 "Mary Lou Soffa",
  title =	 "Continuous testing",
  howpublished = "Personal communication",
  month =	 feb,
  year =	 2003
}


@InProceedings{JohnsonKACMMZD2003,
  author = 	 "Philip M. Johnson and Hongbing Kou and Joy M. Agustin and
                  Christopher Chan and Carleton A. Moore and Jitender
                  Miglani and Shenyan Zhen and William E. Doane",
  title = 	 "Beyond the {Personal Software Process}: Metrics
                  collection and analysis for the differently disciplined",
  crossref =     "ICSE2003",
  pages =	 "641--646",
  abstract =
   "Pedagogies such as the Personal Software Process (PSP) shift metrics
    definition, collection, and analysis from the organizational level to the
    individual level. While case study research indicates that the PSP can
    provide software engineering students with empirical support for improving
    estimation and quality assurance, there is little evidence that many
    students continue to use the PSP when no longer required to do so. Our
    research suggests that this ``PSP adoption problem'' may be due to two
    problems: the high overhead of PSP-style metrics collection and analysis,
    and the requirement that PSP users ``context switch'' between product
    development and process recording. This paper overviews our initial PSP
    experiences, our first attempt to solve the PSP adoption problem with the
    LEAP system, and our current approach called Hackystat. This approach fully
    automates both data collection and analysis, which eliminates overhead and
    context switching. However, Hackystat changes the kind of metrics data that
    is collected, and introduces new privacy-related adoption issues of its
    own."
}

@Article{Boehm1976,
  author = 	 "B. W. Boehm",
  title = 	 "Software engineering",
  journal = 	 {IEEE Transactions on Computers},
  year = 	 1976,
  volume =	 "C-25",
  number =	 12,
  pages =	 "1226--1241"
}


@Book{Boehm1981,
  author =	 "Barry W. Boehm",
  title = 	 "Software Engineering Economics",
  publisher = 	 "Prentice-Hall",
  year = 	 1981,
  OMITseries = 	 "Advances in Computing Science \& Technology"
}


@InProceedings{baziuk1995,
  author = "Walter Baziuk",
  title = "{BNR/NORTEL}: Path to improve product quality, reliability,
                  and customer satisfaction",
  crossref =     "ISSRE95",
  pages = "256-262"
}


@InProceedings{PhongpaibulB2006,
  author = 	 "Monvorath Phongpaibul and Barry Boehm",
  title = 	 "An empirical comparison between pair development and software inspection in {Thailand}",
  crossref =     "ISESE2006",
  pages = 	 "85--94",
  abstract =
   "Although pair programming and software inspection have the common aim of
    minimizing the defects of the software product, each practice has its
    strengths and weaknesses. We need to understand their costs and benefits
    under given conditions to be able to select a practice to execute in a
    development project. The objective of this study is to compare the
    commonalities and differences between pair development and software
    inspection as verification techniques in Thailand. One classroom experiment
    and one industry experiment were conducted. The development effort and
    effect of quality were investigated with some additional calendar time
    comparisons. The classroom results showed that average development effort
    of the pair development group was 24\% less than inspection group with the
    improved product quality. The industry experiment showed pair development
    to have about 4\% more effort but about 40\% fewer major defects. In
    addition, the impacts of cultural differences to the adoption of pair
    programming or software inspection in Thailand are discussed.",
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Delta debugging
%%%

@InProceedings{Zeller1999,
  author = 	 "Andreas Zeller",
  title = 	 "Yesterday, my program worked. {Today}, it does not. {Why}?",
  crossref =     "FSE99",
  pages =	 "253--267",
}


@InProceedings{Zeller2002,
  author = 	 "Andreas Zeller",
  title = 	 "Isolating cause-effect chains from computer programs",
  crossref =     "FSE2002",
  pages =	 "1--10",
}


@InProceedings{CleveZ2005,
  author = 	 "Holger Cleve and Andreas Zeller",
  title = 	 "Locating causes of program failures",
  crossref =     "ICSE2005",
  pages = 	 "342--351",
}


@Article{ZellerH2002,
  author = 	 "Andreas Zeller and Ralf Hildebrandt",
  title = 	 "Simplifying and isolating failure-inducing input",
  journal = 	 TSE,
  year = 	 2002,
  volume =	 28,
  number =	 3,
  pages =	 "183--200",
  month =	 feb
}


@InProceedings{Zeller2005,
  author = 	 "Andreas Zeller",
  title = 	 "When abstraction fails",
  crossref =     "CC2005",
  pages =        "1-9",
  abstract =
   "Reasoning about programs is mostly deduction: the reasoning from the
    abstract model to the concrete run. Deduction is useful because it allows
    us to predict properties of future runs --- up to the point that a program
    will never fail its specification. However, even such a 100\% correct
    program may still show a problem: the specification itself may be
    problematic, or deduction required us to abstract away some relevant
    property. To handle such problems, deduction is not the right
    answer --- especially in a world where programs reach a complexity that
    makes them indistinguishable from natural phenomena. Instead, we should
    enrich our portfolio by methods proven in natural sciences, such as
    observation, induction, and in particular experimentation. In my talk, I
    will show how systematic experimentation automatically reveals the causes
    of program failures --- in the input, in the program state, or in the
    program code.",
}


@InProceedings{DallmeierLZ2005,
  author = 	 "Valentin Dallmeier and Christian Lindig and Andreas Zeller",
  title = 	 "Lightweight defect localization for {Java}",
  crossref =     "ECOOP2005",
  pages = 	 "528--550",
  abstract =
   "A common method to localize defects is to compare the coverage of passing
    and failing program runs: A method executed only in failing runs, for
    instance, is likely to point to the defect. Some failures, though, come to
    be only through a specific \emph{sequence} of method calls, such as
    multiple deallocation of the same resource. Such sequences can be collected
    from arbitrary Java programs at low cost; comparing object-specific
    sequences predicts defects better than simply comparing coverage. In a
    controlled experiment, our technique pinpointed the defective class in 36\%
    of all test runs.",
}


@InProceedings{MisherghiSu2006,
  author =       "Misherghi, Ghassan and Su, Zhendong",
  title =        "{HDD}: Hierarchical Delta Debugging",
  crossref =  "ICSE2006",
  pages =     "142--151",
}


@InProceedings{KalhaugeP2019,
  author = 	 "Kalhauge, Christian Gram and Palsberg, Jens",
  title = 	 "Binary reduction of dependency graphs",
  crossref =  "PLDI2019",
  pages = 	 "556-566",
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Fault localization
%%%


@Article{PapadakisLT2015,
  author = 	 "Papadakis, Mike and Le Traon, Yves",
  title = 	 "{Metallaxis-FL}: Mutation-based fault localization",
  journal = 	 STVR,
  year = 	 2015,
  volume = 	 25,
  number = 	 "5-7",
  pages = 	 "605--628",
  month = 	 aug # "--" # nov,
}


@InProceedings{GonzalezSanchezAGvG2011,
  author =       "Alberto Gonzalez-Sanchez and Rui Abreu and Hans-Gerhard Gross and van Gemund, Arjan J. C.",
  title =        "Spectrum-based sequential diagnosis",
  crossref =     "AAAI2011",
  pages =     "189-196",
}


@InProceedings{AliADW2009,
  author = 	 "Ali, Shaimaa and Andrews, James H. and Dhandapani, Tamilselvi and Wang, Wantao",
  title = 	 "Evaluating the accuracy of fault localization techniques",
  crossref =     "ASE2009",
  pages = 	 "76--87",
}


@InProceedings{ZoeteweijAGvG2007,
  author = 	 "Peter Zoeteweij and Rui Abreu and Rob Golsteijn and van
  Gemund, Arjan J. C. ",
  title = 	 "Diagnosis of embedded software using program spectra",
  crossref =     "ECBS2007",
  pages = 	 "213-220",
}

@Article{WongDGL2014,
  author = 	 "W. Eric Wong and Vidroha Debroy and Ruizhi Gao and Yihao Li",
  title = 	 "The {DStar} method for effective software fault localization",
  journal = 	 IEEETR,
  year = 	 2014,
  volume = 	 63,
  number = 	 1,
  pages = 	 "290-308",
  month = 	 mar,
}


@InProceedings{PapadakisLT2012,
  author = 	 "Papadakis, Mike and Le Traon, Yves",
  title = 	 "Using Mutants to Locate ``Unknown'' Faults",
  crossref =     "ICST2012",
  pages = 	 "691-700",
}


@InProceedings{LeTL2013,
  author = 	 "Le, Tien-Duy B. and Thung, Ferdian and Lo, David",
  title = 	 "Theory and practice, do they match? {A} case with spectrum-based fault localization",
  crossref =     "ICSM2013",
  pages = 	 "380-383",
}


@InProceedings{HongLKJKKK2015,
  author =       "Hong, Shin and Lee, Byeongcheol and Kwak, Taehoon and Jeon, Yiru and Ko, Bongsuk and Kim, Yunho and Kim, Moonzoo",
  title =        "Mutation-Based Fault Localization for Real-World Multilingual Programs",
  crossref =     "ASE2015",
  pages =     "464--475",
}


@TechReport{YooXKCH2014,
  author =       "Shin Yoo and Xiaoyuan Xie and Fei-Ching Kuo and Tsong Yueh Chen and Mark Harman",
  title =        "No pot of gold at the end of program spectrum rainbow: Greatest risk evaluation formula does not exist",
  institution =  UCL,
  year =         2014,
  type =      "Research Note",
  number =    "RN/14/14",
  address =   UCLaddr,
  month =     nov,
  abstract =
   "Spectrum Based Fault Localisation (SBFL) techniques rely on risk assessment
    formulæ to convert program execution spectrum into risk evaluation values,
    which are in turn used to rank program statements according to their
    relative suspiciousness with respect to the observed failure. Recent work
    proved equivalence and hierarchy between different formulæ, identifying a
    few groups of maximal formulæ, i.e., formulæ that do not dominate each
    other. The holy grail in the field has been to come up with the greatest
    formula, that is, the one that dominates all known formulæ. This paper
    proves that such a formula does not exist.",
}


@InProceedings{WeimerNLGF2009,
  author =       "Weimer, Westley and Nguyen, ThanhVu and Le Goues, Claire and Forrest, Stephanie",
  title =        "Automatically finding patches using genetic programming",
  crossref =  "ICSE2009",
  pages =     "364--374",
}


@Article{NaishLR2011,
  author =       "Naish, Lee and Lee, Hua Jie and Ramamohanarao, Kotagiri",
  title =        "A model for spectra-based software diagnosis",
  journal =      TOSEM,
  year =         2011,
  volume =    20,
  number =    3,
  pages =     "11:1--11:32",
  month =     aug,
}

@Article{XieCKX2013,
  author =       "Xie, Xiaoyuan and Chen, Tsong Yueh and Kuo, Fei-Ching and Xu, Baowen",
  title =        "A theoretical analysis of the risk evaluation formulas for spectrum-based fault localization",
  journal =      TOSEM,
  year =         2013,
  volume =    22,
  number =    4,
  pages =     "31:1--31:40",
  month =     oct,
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Dependency, nondeterminism, and flakiness
%%%


@InProceedings{ShiGLM2016,
  author = 	 "Shi, August and Gyori, Alex and Legunsen, Owolabi and Marinov, Darko",
  title = 	 "Detecting assumptions on deterministic implementations of non-deterministic specifications",
  crossref =  "ICST2016",
  pages = 	 "80-90",
}

@misc{nondex-tool,
    author = {Shi, August and Gyori, Alex and Legunsen, Owolabi and Marinov, Darko},
    year = {2016},
    howpublished = "\url{https://github.com/TestingResearchIllinois/NonDex}"
}


@InProceedings{BellLHEYM2018,
  author = 	 "Bell, Jonathan and Legunsen, Owolabi and Hilton, Michael and Eloussi, Lamyaa and Yung, Tifany and Marinov, Darko",
  title = 	 "{DeFlaker}: Automatically detecting flaky tests",
  crossref =  "ICSE2018",
  pages = 	 "433--444",
}


@InProceedings{PalombaZ2017,
  author = 	 "Palomba, Fabio and Zaidman, Andy",
  title = 	 "Does refactoring of test smells induce fixing flaky tests?",
  crossref =  "ICSME2017",
  pages = 	 "1--12",
}


@InProceedings{GammaBZ2018,
  author = 	 "Gambi, Alessio and Bell, Jonathan and Zeller, Andreas",
  title = 	 "Practical test dependency detection",
  crossref =  "ICST2018",
  pages = 	 "1-11",
}


@InProceedings{GyoriSHM2015,
  author = 	 "Gyori, Alex and Shi, August and Hariri, Farah and Marinov, Darko",
  title = 	 "Reliable testing: Detecting state-polluting tests to prevent test dependency",
  crossref =  "ISSTA2015",
  pages = 	 "223--233",
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Time-aware testing
%%%

@inproceedings{10.1145/1146238.1146240,
author = {Walcott, Kristen R. and Soffa, Mary Lou and Kapfhammer, Gregory M. and Roos, Robert S.},
title = {Time-Aware Test Suite Prioritization},
year = {2006},
isbn = {1595932631},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi-org.offcampus.lib.washington.edu/10.1145/1146238.1146240},
doi = {10.1145/1146238.1146240},
abstract = {Regression test prioritization is often performed in a time constrained execution environment in which testing only occurs for a fixed time period. For example, many organizations rely upon nightly building and regression testing of their applications every time source code changes are committed to a version control repository. This paper presents a regression test prioritization technique that uses a genetic algorithm to reorder test suites in light of testing time constraints. Experiment results indicate that our prioritization approach frequently yields higher average percentage of faults detected (APFD) values, for two case study applications, when basic block level coverage is used instead of method level coverage. The experiments also reveal fundamental trade offs in the performance of time-aware prioritization. This paper shows that our prioritization technique is appropriate for many regression testing environments and explains how the baseline approach can be extended to operate in additional time constrained testing circumstances.},
booktitle = {Proceedings of the 2006 International Symposium on Software Testing and Analysis},
pages = {1–12},
numpages = {12},
keywords = {test prioritization, coverage testing, genetic algorithms},
location = {Portland, Maine, USA},
series = {ISSTA '06}
}


@inproceedings{10.1145/1572272.1572297,
author = {Zhang, Lu and Hou, Shan-Shan and Guo, Chao and Xie, Tao and Mei, Hong},
title = {Time-Aware Test-Case Prioritization Using Integer Linear Programming},
year = {2009},
isbn = {9781605583389},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi-org.offcampus.lib.washington.edu/10.1145/1572272.1572297},
doi = {10.1145/1572272.1572297},
abstract = {Techniques for test-case prioritization re-order test cases to increase their rate of fault detection. When there is a fixed time budget that does not allow the execution of all the test cases, time-aware techniques for test-case prioritization may achieve a better rate of fault detection than traditional techniques for test-case prioritization. In this paper, we propose a novel approach to time-aware test-case prioritization using integer linear programming. To evaluate our approach, we performed experiments on two subject programs involving four techniques for our approach, two techniques for an approach to time-aware test-case prioritization based on genetic algorithms, and four traditional techniques for test-case prioritization. The empirical results indicate that two of our techniques outperform all the other techniques for the two subjects under the scenarios of both general and version-specific prioritization. The empirical results also indicate that some traditional techniques with lower analysis time cost for test-case prioritization may still perform competitively when the time budget is not quite tight.},
booktitle = {Proceedings of the Eighteenth International Symposium on Software Testing and Analysis},
pages = {213–224},
numpages = {12},
keywords = {test-case prioritization, integer linear programming},
location = {Chicago, IL, USA},
series = {ISSTA '09}
}


@inproceedings{10.1145/1982185.1982497,
author = {You, Dongjiang and Chen, Zhenyu and Xu, Baowen and Luo, Bin and Zhang, Chen},
title = {An Empirical Study on the Effectiveness of Time-Aware Test Case Prioritization Techniques},
year = {2011},
isbn = {9781450301138},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1982185.1982497},
doi = {10.1145/1982185.1982497},
abstract = {Regression testing is often performed with a time budget and it does not allow executing all test cases. Test case prioritization techniques re-order test cases to increase the rate of fault detection. Several time-aware test case prioritization techniques have been proposed to satisfy a time budget. Since it is difficult to collect the time cost of each test case in some cases, a natural question is whether it is worth using such information when prioritizing test cases. In this paper, two most popular criteria: statement coverage and fault detection are considered for time-aware test case prioritization. We investigate whether the time cost of each test case affects the effectiveness of prioritization techniques, i.e. the rate of statement coverage and the rate of fault detection. Our empirical study shows that: although the techniques considering the time cost of each test case are slightly better than the techniques not considering such information in some cases, they have no significant difference in most cases.},
booktitle = {Proceedings of the 2011 ACM Symposium on Applied Computing},
pages = {1451–1456},
numpages = {6},
keywords = {fault detection, test case prioritization, statement coverage, time},
location = {TaiChung, Taiwan},
series = {SAC '11}
}


@inproceedings{10.1145/3236024.3236053,
author = {Chen, Junjie and Lou, Yiling and Zhang, Lingming and Zhou, Jianyi and Wang, Xiaoleng and Hao, Dan and Zhang, Lu},
title = {Optimizing Test Prioritization via Test Distribution Analysis},
year = {2018},
isbn = {9781450355735},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi-org.offcampus.lib.washington.edu/10.1145/3236024.3236053},
doi = {10.1145/3236024.3236053},
abstract = {Test prioritization aims to detect regression faults faster via reordering test executions, and a large number of test prioritization techniques have been proposed accordingly. However, test prioritization effectiveness is usually measured in terms of the average percentage of faults detected concerned with the number of test executions, rather than the actual regression testing time, making it unclear which technique is optimal in actual regression testing time. To answer this question, this paper first conducts an empirical study to investigate the actual regression testing time of various prioritization techniques. The results reveal a number of practical guidelines. In particular, no prioritization technique can always perform optimal in practice.  To achieve the optimal prioritization effectiveness for any given project in practice, based on the findings of this study, we design learning-based Predictive Test Prioritization (PTP). PTP predicts the optimal prioritization technique for a given project based on the test distribution analysis (i.e., the distribution of test coverage, testing time, and coverage per unit time). The results show that PTP correctly predicts the optimal prioritization technique for 46 out of 50 open-source projects from GitHub, outperforming state-of-the-art techniques significantly in regression testing time, e.g., 43.16% to 94.92% improvement in detecting the first regression fault. Furthermore, PTP has been successfully integrated into the practical testing infrastructure of Baidu (a search service provider with over 600M monthly active users), and received positive feedbacks from the testing team of this company, e.g., saving beyond 2X testing costs with negligible overheads.},
booktitle = {Proceedings of the 2018 26th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering},
pages = {656–667},
numpages = {12},
keywords = {Machine Learning, Regression Testing, Test Prioritization},
location = {Lake Buena Vista, FL, USA},
series = {ESEC/FSE 2018}
}


@article{10.1145/3471906,
author = {Zhou, Jianyi and Chen, Junjie and Hao, Dan},
title = {Parallel Test Prioritization},
year = {2021},
issue_date = {January 2022},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {31},
number = {1},
issn = {1049-331X},
url = {https://doi-org.offcampus.lib.washington.edu/10.1145/3471906},
doi = {10.1145/3471906},
abstract = {Although regression testing is important to guarantee the software quality in software evolution, it suffers from the widely known cost problem. To address this problem, existing researchers made dedicated efforts on test prioritization, which optimizes the execution order of tests to detect faults earlier; while practitioners in industry leveraged more computing resources to save the time cost of regression testing. By combining these two orthogonal solutions, in this article, we define the problem of parallel test prioritization, which is to conduct test prioritization in the scenario of parallel test execution to reduce the cost of regression testing.Different from traditional sequential test prioritization, parallel test prioritization aims at generating a set of test sequences, each of which is allocated in an individual computing resource and executed in parallel. In particular, we propose eight parallel test prioritization techniques by adapting the existing four sequential test prioritization techniques, by including and excluding testing time in prioritization.To investigate the performance of the eight parallel test prioritization techniques, we conducted an extensive study on 54 open-source projects and a case study on 16 commercial projects from Baidu, a famous search service provider with 600M monthly active users. According to the two studies, parallel test prioritization does improve the efficiency of regression testing, and cost-aware additional parallel test prioritization technique significantly outperforms the other techniques, indicating that this technique is a good choice for practical parallel testing. Besides, we also investigated the influence of two external factors, the number of computing resources and time allowed for parallel testing, and find that more computing resources indeed improve the performance of parallel test prioritization. In addition, we investigated the influence of two more factors, test granularity and coverage criterion, and find that parallel test prioritization can still accelerate regression testing in parallel scenario. Moreover, we investigated the benefit of parallel test prioritization on the regression testing process of continuous integration, considering both the cumulative acceleration performance and the overhead of prioritization techniques, and the results demonstrate the superiority of parallel test prioritization.},
journal = {ACM Trans. Softw. Eng. Methodol.},
month = {sep},
articleno = {8},
numpages = {50},
keywords = {parallel testing, Test prioritiization, parallel test prioritization}
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% History-based testing
%%%


@InProceedings{KimP2002,
  author = 	 "Jung-Min Kim and Adam Porter",
  title = 	 "A history-based test prioritization technique for
                  regression testing in resource constrained environments",
  crossref =     "ICSE2002",
  pages =	 "119--129",
  abstract =
   "Regression testing is an expensive and frequently executed maintenance
    process used to revalidate modified software. To improve it, regression
    test selection (RTS) techniques strive to lower costs without overly
    reducing effectiveness by carefully selecting a subset of the test
    suite. Under certain conditions, some can even guarantee that the selected
    test cases perform no worse than the original test suite.
    \par
    But this ignores certain software development realities such as resource
    and time constraints that may prevent using RTS techniques as intended
    (e.g., regression testing must be done overnight, but RTS selection returns
    two days worth of tests). In practice, testers work around this by
    prioritizing the test cases and running only those that fit within existing
    constraints. Unfortunately this generally violates key RTS assumptions,
    voiding RTS technique guarantees and making regression testing performance
    unpredictable.
    \par
    Despite this, existing prioritization techniques are memoryless, implicitly
    assuming that local choices can ensure adequate long run
    performance. Instead, we proposed a new technique that bases prioritization
    on historical execution data. We conducted an experiment to assess its
    effects on the long run performance of resource constrained regression
    testing. Our results expose essential tradeoffs that should be considered
    when using these techniques over a series of software releases.",
}


@inproceedings{10.1145/2896941.2896949,
author = {Wang, Xiaolin and Zeng, Hongwei},
title = {History-Based Dynamic Test Case Prioritization for Requirement Properties in Regression Testing},
year = {2016},
isbn = {9781450341578},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi-org.offcampus.lib.washington.edu/10.1145/2896941.2896949},
doi = {10.1145/2896941.2896949},
abstract = {Regression testing is an important but extremely costly and time-consuming process. Because of limited resources in practice, test case prioritization focuses on the improvement of testing efficiency. However, traditional test case prioritization techniques emphasize only one-time testing without considering huge historical data generated in regression testing. This paper proposes an approach to prioritizing test cases based on historical data. Requirements are a significant factor in the testing process, the priorities of test cases are initialized based on requirement priorities in our history-based approach, and then are calculated dynamically according to historical data in regression testing. To evaluate our approach, an empirical study on an industrial system is conducted. Experimental results show an improved performance for our proposed method using measurements of Average Percentage of Faults Detected and Fault Detection Rate.},
booktitle = {Proceedings of the International Workshop on Continuous Software Evolution and Delivery},
pages = {41–47},
numpages = {7},
keywords = {history data, regression testing, test case prioritization, requirement property},
location = {Austin, Texas},
series = {CSED '16}
}


@inproceedings{10.1145/3019612.3019831,
author = {Kim, Jeongho and Jeong, Hohyeon and Lee, Eunseok},
title = {Failure History Data-Based Test Case Prioritization for Effective Regression Test},
year = {2017},
isbn = {9781450344869},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi-org.offcampus.lib.washington.edu/10.1145/3019612.3019831},
doi = {10.1145/3019612.3019831},
abstract = {For regression testing in the continuous integration environments, the time and cost should be considered; to satisfy these constraints, it is necessary to improve the test efficiency regarding the achievement of the test goal. It is especially important to identify the problem quickly by first executing a test case with a high probability of failure. This paper therefore proposes the FHD (<u>F</u>ailure <u>H</u>istory <u>D</u>ata)-Prioritization technique for the purpose of effective regression testing. This technique uses the failed test case history, the flipped result of method test case and the correlation data as the prioritization criteria, and the algorithm is designed to calculate the weight through the following two-step classification: 1) The FHD-Prioritization analyzes the failure history data statistically and sequentially arranges the test cases from the highest failure occurrence probability in the current session. 2) If a failure occurs during the test, the FHD-Prioritization reprioritizes in real time based on the correlation data. The performance of the FHD-Prioritization technique is evaluated with Tomcat and Camel, Apache open source software projects that were developed in the continuous integration environment. Because all of these projects are composed of the real faults of real-world projects, it is possible to practically evaluate the efficiency of the proposed approach. The FHD-Prioritization improved the efficiency of test case prioritization by about 5.62+ and 2.17+, respectively, compared to ROCKET and AFSAC.},
booktitle = {Proceedings of the Symposium on Applied Computing},
pages = {1409–1415},
numpages = {7},
keywords = {failure history data, test case prioritization, correlation data, flipped result of test case data, failed test case history data, continuous integration environments, regression test},
location = {Marrakech, Morocco},
series = {SAC '17}
}


@inproceedings{10.1145/3127005.3127006,
author = {Noor, Tanzeem Bin and Hemmati, Hadi},
title = {Studying Test Case Failure Prediction for Test Case Prioritization},
year = {2017},
isbn = {9781450353052},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi-org.offcampus.lib.washington.edu/10.1145/3127005.3127006},
doi = {10.1145/3127005.3127006},
abstract = {Background: Test case prioritization refers to the process of ranking test cases within a test suite for execution. The goal is ranking fault revealing test cases higher so that in case of limited budget one only executes the top ranked tests and still detects as many bugs as possible. Since the actual fault detection ability of test cases is unknown before execution, heuristics such as "code coverage" of the test cases are used for ranking test cases. Other test quality metrics such as "coverage of the changed parts of the code" and "number of fails in the past"' have also been studied in the literature. Aims: In this paper, we propose using a logistic regression model to predict the failing test cases in the current release based on a set of test quality metrics. Method: We have studied the effect of including our newly proposed quality metric ("similarity-based" metric) into this model for tests prioritization. Results: The results of our experiments on five open source systems show that none of the individual quality metrics of our study outperforms the others in all the projects. Conclusions: However, the ranks given by the regression model are more consistent in prioritizing fault revealing test cases in the current release.},
booktitle = {Proceedings of the 13th International Conference on Predictive Models and Data Analytics in Software Engineering},
pages = {2–11},
numpages = {10},
keywords = {Regression Model, Test Case Prioritization, Test Case Quality Metrics},
location = {Toronto, Canada},
series = {PROMISE}
}


@INPROCEEDINGS{8952321,  author={Chen, Junjie and Wang, Guancheng and
Hao, Dan and Xiong, Yingfei and Zhang, Hongyu and Zhang, Lu},
booktitle={2019 34th IEEE/ACM International Conference on Automated
Software Engineering (ASE)},   title={History-Guided Configuration
Diversification for Compiler Test-Program Generation},   year={2019},
volume={},  number={},  pages={305-316},
doi={10.1109/ASE.2019.00037}}
  This paper is not about test selection or prioritization, but about
tools that generate test programs for testing compilers.
  "we propose a novel test-program generation approach,
called HiCOND, which utilizes historical data for configuration
diversification to solve this challenge. HiCOND first infers the
range for each option in a test configuration where bug-revealing
test programs are more likely to be generated based on historical
data. Then, it identifies a set of test configurations that can lead to
diverse test programs through a search method (particle swarm
optimization)."


@article{10.1016/j.jss.2011.09.063,
author = {Huang, Yu-Chi and Peng, Kuan-Li and Huang, Chin-Yu},
title = {A History-Based Cost-Cognizant Test Case Prioritization Technique in Regression Testing},
year = {2012},
issue_date = {March, 2012},
publisher = {Elsevier Science Inc.},
address = {USA},
volume = {85},
number = {3},
issn = {0164-1212},
url = {https://doi.org/10.1016/j.jss.2011.09.063},
doi = {10.1016/j.jss.2011.09.063},
abstract = {Software testing is typically used to verify whether the developed software product meets its requirements. From the result of software testing, developers can make an assessment about the quality or the acceptability of developed software. It is noted that during testing, the test case is a pair of input and expected output, and a number of test cases will be executed either sequentially or randomly. The techniques of test case prioritization usually schedule test cases for regression testing in an order that attempts to increase the effectiveness. However, the cost of test cases and the severity of faults are usually varied. In this paper, we propose a method of cost-cognizant test case prioritization based on the use of historical records. We gather the historical records from the latest regression testing and then propose a genetic algorithm to determine the most effective order. Some controlled experiments are performed to evaluate the effectiveness of our proposed method. Evaluation results indicate that our proposed method has improved the fault detection effectiveness. It can also been found that prioritizing test cases based on their historical information can provide high test effectiveness during testing.},
journal = {J. Syst. Softw.},
month = {mar},
pages = {626–637},
numpages = {12},
keywords = {Fault severity, Software development life cycle, Regression testing, Test case prioritization, Software testing}
}


@inproceedings{10.1145/3460319.3464840,
author = {Pan, Cong and Pradel, Michael},
title = {Continuous Test Suite Failure Prediction},
year = {2021},
isbn = {9781450384599},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3460319.3464840},
doi = {10.1145/3460319.3464840},
abstract = {Continuous integration advocates to run the test suite of a project frequently, e.g., for every code change committed to a shared repository. This process imposes a high computational cost and sometimes also a high human cost, e.g., when developers must wait for the test suite to pass before a change appears in the main branch of the shared repository. However,  The question arises whether running the test suite for each code change is really necessary. This paper presents continuous test suite failure prediction, which reduces the cost of continuous integration by  We also present a theoretical cost model that describes when continuous test suite failure prediction is worthwhile. Evaluating the idea with 15k test suite runs from 242 open-source projects shows that the approach is effective at predicting whether running the test suite is likely to reveal a test failure. Moreover, we find that our approach improves the AUC over baselines that use features proposed for just-in-time defect prediction and test case failure prediction by 13.9% and 2.9%, respectively. Overall, continuous test suite failure prediction can significantly reduce the cost of continuous integration.},
booktitle = {Proceedings of the 30th ACM SIGSOFT International Symposium on Software Testing and Analysis},
pages = {553–565},
numpages = {13},
keywords = {continuous integration, cost model, continuous test suite failure prediction, machine learning},
location = {Virtual, Denmark},
series = {ISSTA 2021}
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Axiomatic testing, metamorphic testing, property-based testing
%%%


@InProceedings{DavisW81,
  author = 	 "Martin D. Davis and Elaine J. Weyuker",
  title = 	 "Pseudo-oracles for non-testable programs",
  booktitle =	 "ACM 81: Proceedings of the ACM '81 conference",
  pages =	 "254--257",
  year =	 1981,
  NEEDaddress = 	 "",
  month = 	 nov # "~9--11,",
  abstract =
   "The most commonly used method of validating a program is by testing. The
    programmer typically runs the program on some test cases, and if and when
    they run correctly, the program is considered to be correct.
    \par
    We know that many difficult problems are associated with testing. One such
    problem is that it is a fundamental part of the testing process to require
    the ability to infer properties of a program by observing the program's
    behavior on selected inputs. The most common property that one hopes to
    infer through testing is correctness. But unless the program is run on the
    entire input domain, there are infinitely many programs which produce the
    correct output on the selected inputs, but produce incorrect output for
    some other element of the domain."
}


@Article{FinkB1997,
  author = 	 "Fink, George and Bishop, Matt",
  title = 	 "Property-based testing: A new approach to testing for assurance",
  journal = 	 SEN,
  year = 	 1997,
  volume = 	 22,
  number = 	 4,
  pages = 	 "74-80",
  month = 	 jul,
}


@Article{SeguraFSRC2016,
  author = 	 "Segura, Sergio and Fraser, Gordon and Sanchez, Ana B. and Ruiz-Cort{\'e}s, Antonio",
  authorASCII =  "Segura, Sergio and Fraser, Gordon and Sanchez, Ana B. and Ruiz-Cortes, Antonio",
  authorASCII2 = "Segura, Sergio and Fraser, Gordon and Sanchez, Ana B. and Ruiz-Cortés, Antonio",
  title = 	 "A Survey on Metamorphic Testing",
  journal = 	 IEEETSE,
  year = 	 2016,
  volume = 	 42,
  number = 	 9,
  pages = 	 "805-824",
}


@Article{AmmannK1988,
  author = 	 "Paul E. Ammann and John C. Knight",
  title = 	 "Data Diversity: An Approach to Software Fault Tolerance",
  journal = 	 "IEEE Trans. Comput.",
  year = 	 1988,
  volume =	 37,
  number =	 4,
  pages =	 "418--425",
  month =	 apr,
  abstract =
   "Data diversity is described, and the results of a pilot study are
    presented. The regions of the input space that cause failure for certain
    experimental programs are discussed, and data reexpression, the way in
    which alternate input data sets can be obtained, is examined. A description
    is given of the retry block which is the data-diverse equivalent of the
    recovery block, and a model of the retry block, together with some
    empirical results is presented. N-copy programming which is the
    data-diverse equivalent of N-version programming is considered, and a
    simple model and some empirical results are also given.",
}


@Article{Wasserman:1997:SRR,
  author =       "Hal Wasserman and Manuel Blum",
  title =        "Software reliability via run-time result-checking",
  journal =      "Journal of the ACM",
  volume =       "44",
  number =       "6",
  pages =        "826--849",
  month =        nov,
  year =         "1997",
  coden =        "JACOAH",
  ISSN =         "0004-5411",
  bibdate =      "Fri Feb 13 15:58:32 MST 1998",
  url =          "http://www.acm.org:80/pubs/citations/journals/jacm/1997-44-6/p826-wasserman/",
  abstract =     "We review the field of result-checking, discussing
                 simple checkers and self-correctors. We argue that such
                 checkers could profitably be incorporated in software
                 as an aid to efficient debugging and enhanced
                 reliability. We consider how to modify traditional
                 checking methodologies to make them more appropriate
                 for use in real-time, real-number computer systems. In
                 particular, we suggest that checkers should be allowed
                 to use stored randomness: that is, that they should be
                 allowed to generate, preprocess, and store random bits
                 prior to run-time, and then to use this information
                 repeatedly in a series of run-time checks. In a case
                 study of checking a general real-number linear
                 transformation (e.g., a Fourier Transform), we present
                 a simple checker which uses stored randomness, and a
                 self-corrector which is particularly efficient if
                 stored randomness is employed.",
  keywords =     "algorithms; reliability; verification",
  subject =      "{\bf D.2.5} Software, SOFTWARE ENGINEERING, Testing
                 and Debugging. {\bf F.2.1} Theory of Computation,
                 ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY,
                 Numerical Algorithms and Problems, Computation of
                 transforms. {\bf F.3.1} Theory of Computation, LOGICS
                 AND MEANINGS OF PROGRAMS, Specifying and Verifying and
                 Reasoning about Programs.",
}


@InProceedings{issta93*1,
  author =       "Manuel Blum",
  title =        "Designing Programs to Check Their Work",
  pages =        1,
  ISBN =         "0-89791-608-5",
  crossref =     "ISSTA93",
}


@Article{Blum:1995:DPC,
  author =       "Manuel Blum and Sampath Kannan",
  title =        "Designing Programs that Check Their Work",
  journal =      "Journal of the Association for Computing Machinery",
  volume =       42,
  number =       1,
  pages =        "269--291",
  month =        jan,
  year =         1995,
  coden =        "JACOAH",
  ISSN =         "0004-5411",
  bibdate =      "Mon May 15 21:04:34 1995",
  url =          "http://www.acm.org/pubs/toc/Abstracts/0004-5411/200880.html",
  abstract =     "A {\em program correctness checker\/} is an algorithm
                 for checking the output of a computation. That is,
                 given a program and an instance on which the program is
                 run, the checker certifies whether the output of the
                 program on that instance is correct. This paper defines
                 the concept of a program checker. It designs program
                 checkers for a few specific and carefully chosen
                 problems in the class FP of functions computable in
                 polynomial time. Problems in FP for which checkers are
                 presented in this paper include Sorting, Matrix Rank
                 and GCD. It also applies methods of modern
                 cryptography, especially the idea of a probabilistic
                 interactive proof, to the design of program checkers
                 for group theoretic computations.\par Two structural
                 theorems are proven here. One is a characterization of
                 problems that can be checked. The other theorem
                 establishes equivalence classes of problems such that
                 whenever one problem in a class is checkable, all
                 problems in the class are checkable.",
  keywords =     "algorithms; design; reliability; theory;
                 verification",
  subject =      "{\bf D.2.4}: Software, SOFTWARE ENGINEERING, Program
                 Verification, Correctness proofs. {\bf D.2.4}:
                 Software, SOFTWARE ENGINEERING, Program Verification,
                 Reliability. {\bf F.2.0}: Theory of Computation,
                 ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, General.
                 {\bf F.3.1}: Theory of Computation, LOGICS AND MEANINGS
                 OF PROGRAMS, Specifying and Verifying and Reasoning
                 about Programs. {\bf G.3}: Mathematics of Computing,
                 PROBABILITY AND STATISTICS, Probabilistic algorithms
                 (including Monte Carlo).",
}


@TechReport{ChenCY1998,
  author = 	 "T. Y. Chen and S. C. Cheung and S. M. Yiu",
  title = 	 "Metamorphic testing: A new approach for generating next test cases",
  institution =  "HKUST Department of Computer Science",
  year = 	 1998,
  number = 	 "HKUST-CS98-01",
  address = 	 "Hong Kong",
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Other topics
%%%


@InProceedings{Parnas94,
  author = 	 "David L. Parnas",
  title = 	 "Software aging",
  crossref =     "ICSE94",
  pages =	 "279--287",
}


@Article{1980:tse:white,
  title =        "A Domain Strategy for Computer Program Testing",
  author =       "Lee J. White and Edward I. Cohen",
  pages =        "247--257",
  journal =      TSE,
  ISSN =         "0098-5589",
  year =         "1980",
  volume =       "6",
  month =        may,
  number =       "3",
  referencedby = "\cite{1992:tosem:zeil}, \cite{1997:tse:gallagher}",
  note =         "Special Collection on Program Testing",
  annote =       "incomplete",
}


@Article{Whittaker97,
  author = 	 "James A. Whittaker",
  title = 	 "Stochastic software testing",
  journal = 	 AnnalsSE,
  year = 	 "1997",
  OPTkey = 	 "",
  volume = 	 "1",
  number = 	 "4",
  pages = 	 "115--131",
  OPTmonth = 	 "",
  OPTnote = 	 "",
  OPTannote = 	 ""
}


@InProceedings{BoyapatiKM2002:ISSTA,
  author = 	 "Chandrasekhar Boyapati and Sarfraz Khurshid and Darko Marinov",
  title = 	 "Korat: Automated Testing Based on {Java} Predicates",
  crossref =     "ISSTA2002",
  pages =	 "123--133",
}


@InProceedings{MarinovK2001,
  author = 	 "Darko Marinov and Sarfraz Khurshid",
  title = 	 "{TestEra}: A novel framework for automated testing of {Java} programs",
  crossref =     "ASE2001",
  pages =	 "22--34",
  abstract =
   "We present TestEra, a novel framework for automated testing of Java
    programs. TestEra automatically generates all non-isomorphic test cases,
    within a given input size, and evaluates correctness criteria. As an
    enabling technology,TestEra uses Alloy, a first-order relational language,
    and the Alloy Analyzer. Checking a program with TestEra involves modeling
    the correctness criteria for the program in Alloy and specifying
    abstraction and concretization translations between instances of Alloy
    models and Java data structures. TestEra produces concrete Java inputs as
    counterexamples to violated correctness criteria. This paper discusses
    TestEra's analyses of several case studies: methods that manipulate singly
    linked lists and red-black trees, a naming architecture, and a part of the
    Alloy Analyzer.",
}


@InProceedings{WhaleyML2002:ISSTA,
  author = 	 "John Whaley and Michael Martin and Monica Lam",
  title = 	 "Automatic extraction of object-oriented component interfaces",
  crossref =     "ISSTA2002",
  pages =	 "218--228",
  abstract =
   "Component-based software design is a popular and effective approach to
    designing large systems. While components typically have well-defined
    interfaces, sequencing information---which calls must come in which
    order---is often not formally specified. This paper proposes using multiple
    finite state machine (FSM) submodels to model the interface of a class.  A
    submodel includes a subset of methods that, for example, implement a Java
    interface, or access some particular field. Each state-modifying method is
    represented as a state in the FSM, and transitions of the FSMs represent
    allowable pairs of consecutive methods. In addition, state-preserving
    methods are constrained to execute only under certain states. We have
    designed and implemented a system that includes static analyses to deduce
    illegal call sequences in a program, dynamic instrumentation techniques to
    extract models from execution runs, and a dynamic model checker that
    ensures that the code conforms to the model. Extracted models can serve as
    documentation; they can serve as constraints to be enforced by a static
    checker; they can be studied directly by developers to determine if the
    program is exhibiting unexpected behavior; or they can be used to determine
    the completeness of a test suite. Our system has been run on several large
    code bases, including the joeq virtual machine, the basic Java libraries,
    and the Java 2 Enterprise Edition library code. Our experience suggests
    that this approach yields useful information."
}


@InProceedings{AlurCMN2005,
  author = 	 "Rajeev Alur and Pavol {\v{C}}ern{\'y} and P. Madhusadan and Wonhong Nam",
  authorASCII =  "Rajeev Alur and Pavol Cerny and P. Madhusadan and Wonhong Nam",
  title = 	 "Synthesis of interface specifications for {Java} classes",
  crossref =     "POPL2005",
  pages = 	 "98--109",
}


@InProceedings{PodgurskiLFMMSW03,
  author = 	 "Andy Podgurski and David Leon and Patrick Francis and Wes
        	  Masri and Melinda Minch and Jiayang Sun and Bin Wang",
  title = 	 "Automated support for classifying software failure reports",
  crossref =     "ICSE2003",
  pages =	 "465--475",
}


@InProceedings{PavlopoulouY99,
  author = 	 "Christina Pavlopoulou and Michal Young",
  title = 	 "Residual test coverage monitoring",
  crossref =     "ICSE99",
  pages =	 "277--284",
  abstract =
   "Structural coverage criteria are often used as an indicator of the
    thoroughness of testing, but complete satisfaction of a criterion is seldom
    achieved. When a software product is released with less than 100\% coverage,
    testers are explicitly or implicitly assuming that executions satisfying
    the remaining test obligations (the residue) are either infeasible or occur
    so rarely that they have negligible impact on quality. Violation of this
    assumption indicates shortcomings in the testing process.
    \par
    Monitoring in the deployed environment, even in the beta test phase, is
    typically limited to error and sanity checks. Monitoring the residue of
    test coverage in actual use can provide additional useful information, but
    it is unlikely to be accepted by users unless its performance impact is
    very small. Experience with a prototype tool for residual test coverage
    monitoring of Java programs suggests that, at least for statement coverage,
    the simple strategy of removing all probes except those corresponding to
    the residue of coverage testing reduces execution overhead to acceptably
    low levels.",
}


@Article{HansonR85,
  author = 	 "Stephen Jos{\'e} Hanson and Richard R. Rosinski",
  title = 	 "Programmer perceptions of productivity and programming tools",
  journal = 	 CACM,
  year = 	 1985,
  volume =	 28,
  number =	 2,
  pages =	 "180--189",
  month =	 feb,
  doi = {https://doi.acm.org/10.1145/2786.2791},
  abstract =
   "Psychometric scaling methods are applied to programmer productivity
    assessments of 20 tools to recommend a set of minimal, as well as more
    comprehensive, tools.",
}


@InProceedings{ChildersDS2003,
  author = 	 "Bruce Childers and Jack W. Davidson and Mary Lou Soffa",
  title = 	 "Continuous Compilation: A New Approach to Aggressive and
                  Adaptive Code Transformation",
  crossref =     "IPDPS2003",
  pages =	 "205--214",
  abstract =
   "Over the past several decades, the compiler research community has
    developed a number of sophisticated and powerful algorithms for a variety
    of code improvements. While there are still promising directions for
    particular optimizations, research on new or improved optimizations is
    reaching the point of diminishing returns and new approaches are needed to
    achieve significant performance improvements beyond traditional
    optimizations. In this paper, we describe a new strategy based on a
    continuous compilation system that constantly improves application code by
    applying aggressive and adaptive code optimizations at all times, from
    static optimization to online dynamic optimization. In this paper, we
    describe our general approach and process for continuous compilation of
    application code. We also present initial results from our research with
    continuous compilation. These initial results include a new prediction
    framework that can estimate the benefit of applying code transformations
    without actually doing the transformation. We also describe results that
    demonstrate the benefit of adaptively changing application code for
    embedded systems to make trade-offs between code size, performance, and
    power consumption."
}


@InProceedings{Magpie84,
  author = 	 "Mayer D. Schwartz and Norman M. Delisle and Vimal S. Begwani",
  title = 	 "Incremental compilation in {Magpie}",
  crossref =     "CC84",
  pages =	 "122--131",
  doi = {https://doi.acm.org/10.1145/502874.502887},
}


@InProceedings{Karasick98,
  author = 	 "Michael Karasick",
  title = 	 "The architecture of {Montana}: an open and extensible programming environment with an incremental {C++} compiler",
  crossref =     "FSE98",
  pages =	 "131--142",
  doi = {https://doi.acm.org/10.1145/288195.288284},
}

@book{test-driven-development,
  title = {Test-Driven Development: By Example},
  author = {Kent Beck},
  publisher = {Addison-Wesley},
  address = {Boston},
  year = 2002,
}


@InProceedings{Weide2001,
  author = 	 "Bruce W. Weide",
  title = 	 "``Modular regression testing'': Connections to
                  component-based software",
  crossref =     "CBSE2001",
  pages =	 "47--51",
  abstract =
   "Many have argued that software that is not designed to support modular
    reasoning about its behavior is inherently fragile and costly to maintain,
    and that software engineers should seek to achieve the modular reasoning
    property to help overcome these problems. But some people resist these
    claims, taking one of two contradictory positions:
    \begin{enumerate}
    \item Modular reasoning is
    inherently limited to impractical purely functional programs where there is
    no state and there are no side-effects.
    \item Modular reasoning is possible
    for any reasonably ``well-designed'' software system written in a modern
    imperative object-oriented language that uses its sophisticated
    encapsulation mechanisms.
    \end{enumerate}
    Explanations of why (1) is wrong have been relatively effective. We
    suspect this is because both experimental and (more recently) commercial
    software has been built in C++ in a disciplined way that supports modular
    reasoning about its behavior, and it has (among other advantages)
    dramatically lower defect rates than ``normal'' software of like
    kind.
    \par
    Explanations of why (2) is wrong have been less effective. We suspect this
    is because they have been based on synthetic examples that appear to be
    pathological and therefore of little practical consequence. Using a thought
    experiment involving regression testing of systems having features that no
    one should doubt are just like ``real'' software, we make another stab at
    giving a convincing argument on this point."
}


@article{WildeS95,
 author = {Norman Wilde and Michael C. Scully},
 title = {Software reconnaissance: mapping program features to code},
 journal = {Journal of Software Maintenance},
 volume = {7},
 number = {1},
 year = {1995},
 ISSN = {1040-550X},
 pages = {49--62},
 publisher = {John Wiley \& Sons, Inc.},
 }


@InProceedings{StottsLA2002,
  author = 	 "David Stotts and Mark Lindsey and Angus Antley",
  title = 	 "An informal formal method for systematic {JUnit} test
                  case generation",
  crossref =     "XPAU2002",
  pages = 	 "131--143",
  abstract =
   "The JUnit testing tool is widely used to support the central XP concept of
    test first software development.  While JUnit provides Java classes for
    expressing test cases and test suites, it does not provide or proscribe per
    se any guidelines for deciding what test cases are good ones for any
    particular class.  We have developed a method for systematically creating
    complete and consistent test classes for JUnit. Called JAX (for Junit
    Axioms), the method is based on Guttag's algebraic specification of
    abstract data types.  We demonstrate an informal use of ADT semantics for
    guiding JUnit test method generation; the programmer uses no formal
    notation other than Java, and the procedure meshes with XP test-as-design
    principles.  Preliminary experiments show that informal JAX-based testing
    finds more errors than an ad hoc form of JUnit testing.",
}


@Article{BeckG98,
  author = 	 "Kent Beck and Erich Gamma",
  title = 	 "{JUnit} test infected: Programmers love writing tests",
  journal = 	 "Java Report",
  year = 	 1998,
  volume = 	 3,
  number = 	 7,
  NEEDpages = 	 "",
  month = 	 jul
}


@InProceedings{BeyerCHJM2004,
  author = 	 "Dirk Beyer and Adam J. Chlipala and Thomas A. Henzinger
                  and Ranjit Jhala and Rupak Majumdar",
  title = 	 "Generating tests from counterexamples",
  crossref =     "ICSE2004",
  pages = 	 "326--335",
  abstract =
   "We have extended the software model checker Blast to automatically generate
    test suites that guarantee full coverage with respect to a given
    predicate. More precisely, given a C program and a target predicate p,
    Blast determines the set L of program locations which program execution can
    reach with p true, and automatically generates a set of test vectors that
    exhibit the truth of p at all locations in L\@.  We have used Blast to
    generate test suites and to detect dead code in C programs with up to 30K
    lines of code. The analysis and test-vector generation is fully automatic
    (no user intervention) and exact (no false positives)."
}


@Article{ChillaregeBCHMRW92,
  author = 	 "Ram Chillarege and Inderpal S. Bhandari and Jarir
                  K. Chaar and Michael J. Halliday and Diane S. Moebus and
                  Bonnie K. Ray and Man-Yuen Wong",
  title = 	 "Orthogonal defect classification---A concept for
                  in-process measurements",
  journal = 	 TSE,
  year = 	 1992,
  volume =	 18,
  number =	 11,
  month =	 nov,
  abstract =
   "Orthogonal defect classification (ODC), a concept that enables in-process
    feedback to software developers by extracting signatures on the development
    process from defects, is described. The ideas are evolved from an earlier
    finding that demonstrates the use of semantic information from defects to
    extract cause-effect relationships in the development process. This finding
    is leveraged to develop a systematic framework for building measurement and
    analysis methods. The authors define ODC and discuss the necessary and
    sufficient conditions required to provide feedback to a developer;
    illustrate the use of the defect type distribution to measure the progress
    of a product through a process; illustrate the use of the defect trigger
    distribution to evaluate the effectiveness and eventually the completeness
    of verification processes such as inspection or testing; provides sample
    results from pilot projects using ODC; and open the doors to a wide variety
    of analysis techniques for providing effective and fast feedback based on
    the concepts of ODC.",
}


@InProceedings{1007531,
 author = {Kevin Sullivan and Jinlin Yang and David Coppit and Sarfraz Khurshid and Daniel Jackson},
 title = {Software assurance by bounded exhaustive testing},
 booktitle = {Proceedings of the 2004 ACM SIGSOFT international symposium on Software testing and analysis},
 year = {2004},
 ISBN = {1-58113-820-2},
 pages = {133--142},
 address  = {Boston, Massachusetts, USA},
 doi = {https://doi.acm.org/10.1145/1007512.1007531},
 }


@InProceedings{RuthruffBR2005,
  author = 	 "Joseph R. Ruthruff and Margaret Burnett and Gregg Rothermel",
  title = 	 "An empirical study of fault localization for end-user programmers",
  crossref =     "ICSE2005",
  pages = 	 "352--361",
}


@InProceedings{XieMSN2005,
  author = 	 "Tao Xie and Darko Marinov and Wolfram Schulte and David Notkin",
  title = 	 "Symstra: A framework for generating object-oriented unit
                  tests using symbolic execution",
  crossref =     "TACAS2005",
  pages =	 "365--381",
  abstract =
   "Object-oriented unit tests consist of sequences of method
    invocations.  Behavior of an invocation depends on the method's
    arguments and the state of the receiver at the beginning of the
    invocation.  Correspondingly, generating unit tests involves two tasks:
    generating method sequences that build relevant receiver-object states and
    generating relevant method arguments.  This paper proposes Symstra, a
    framework that achieves both test generation tasks using symbolic execution
    of method sequences with symbolic arguments.  The paper defines symbolic
    states of object-oriented programs and novel comparisons of states.  Given
    a set of methods from the class under test and a bound on the length of
    sequences, Symstra systematically explores the object-state space of the
    class and prunes this exploration based on the state comparisons.
    Experimental results show that Symstra generates unit tests that achieve
    higher branch coverage faster than the existing test-generation techniques
    based on concrete method arguments.",
}

@InProceedings{XieMN2004,
  author = 	 "Tao Xie and Darko Marinov and David Notkin",
  title = 	 "Rostra: A framework for detecting redundant
                   object-oriented unit tests",
  crossref =     "ASE2004",
  pages =	 "196--205",
  abstract =
   "Object-oriented unit tests consist of sequences of method invocations.
    Behavior of an invocation depends on the state of the receiver object and
    method arguments at the beginning of the invocation.  Existing tools for
    automatic generation of object-oriented test suites, such as Jtest and
    JCrasher for Java, typically ignore this state and thus generate redundant
    tests that exercise the same method behavior, which increases the testing
    time without increasing the ability to detect faults.
    \par
    This paper proposes Rostra, a framework for detecting redundant unit tests,
    and presents five fully automatic techniques within this framework.  We use
    Rostra to assess and minimize test suites generated by test-generation
    tools.  We also present how Rostra can be added to these tools to avoid
    generation of redundant tests.  We have implemented the five Rostra
    techniques and evaluated them on 11 subjects taken from a variety of
    sources.  The experimental results show that Jtest and JCrasher generate a
    high percentage of redundant tests and that Rostra can remove these
    redundant tests without decreasing the quality of test suites.",
}


@InProceedings{XieN2004:ICFEM,
  author = 	 "Tao Xie and David Notkin",
  title = 	 "Automatic extraction of object-oriented observer
                  abstractions from unit-test executions",
  crossref =     "ICFEM2004",
  pages =	 "290--305",
  abstract =
   "Unit testing has become a common step in software development.  Although
    manually created unit tests are valuable, they are often insufficient;
    therefore, programmers can use an automatic unit-test-generation tool to
    produce a large number of additional tests for a class.  However, without a
    priori specifications, programmers cannot practically inspect the execution
    of each automatically generated test.  In this paper, we develop the
    observer abstraction approach for automatically extracting
    object-state-transition information of a class from unit-test executions,
    without requiring a priori specifications.  Given a class and a set of its
    initial tests generated by a third-party tool, we generate new tests to
    augment the initial tests and produce the abstract state of an object based
    on the return values of a set of observers (public methods with non-void
    returns) invoked on the object.  From the executions of both the new and
    initial tests, we automatically extract observer abstractions, each of
    which is an object state machine (OSM): a state in the OSM represents an
    abstract state and a transition in the OSM represents method calls.  We
    have implemented the Obstra tool for the approach and have applied the
    approach on complex data structures; our experiences suggest that this
    approach provides useful object-state-transition information for
    programmers to inspect unit-test executions effectively.",
}


@InProceedings{BowringRH2004,
 author = {James F. Bowring and James M. Rehg and Mary Jean Harrold},
 title = {Active learning for automatic classification of software behavior},
  crossref =     "ISSTA2004",
 pages = {195--205},
 doi = {https://doi.acm.org/10.1145/1007512.1007539},
 }


@InProceedings{TillmanS2005,
  author = 	 "Nikolai Tillmann and Wolfram Schulte",
  title = 	 "Parameterized unit tests",
  crossref =     "FSE2005",
  pages =	 "253--262",
  abstract =
   "Parameterized unit tests extend the current industry practice of using
    closed unit tests defined as parameterless methods. Parameterized unit
    tests separate two concerns:  1) They specify the external behavior of the
    involved methods for all test arguments. 2) Test cases can be re-obtained
    as traditional closed unit tests by instantiating the parameterized unit
    tests. Symbolic execution and constraint solving can be used to
    automatically choose a minimal set of inputs that exercise a parameterized
    unit test with respect to possible code paths of the implementation. In
    addition, parameterized unit tests can be used as symbolic summaries which
    allows symbolic execution to scale for arbitrary abstraction levels. We
    have developed a prototype tool which computes test cases from
    parameterized unit tests. We report on its first use testing parts of the
    .NET base class library.",
}


@InProceedings{CheonL2002,
  author = 	 "Cheon, Yoonsik and Leavens, Gary T.",
  title = 	 "A simple and practical approach to unit testing: The {JML} and {JUnit} way",
  crossref =  "ECOOP2002",
  pages = 	 "231--255",
}


@InProceedings{ElkarabliehZK2007,
  author = 	 "Bassem Elkarablieh and Yehia Zayour and Sarfraz Khurshid",
  title = 	 "Efficiently generating structurally complex inputs with thousands of objects",
  crossref =     "ECOOP2007",
  pages = 	 "248--272",
}


@InProceedings{StaatsWH2011,
  author = 	 "Matt Staats and Michael W. Whalen and Mats P.E. Heimdahl",
  title = 	 "Programs, tests, and oracles: The foundations of testing revisited",
  crossref =     "ICSE2011",
  pages = 	 "391--400",
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Uncategorized
%%%


@article{ChenTseChen2001,
 author = {Huo Yan Chen and T. H. Tse and T. Y. Chen},
 title = {{TACCLE}: A methodology for object-oriented software testing at the class and cluster levels},
 journal = TSE,
 volume = {10},
 number = {1},
 year = {2001},
 issn = {1049-331X},
 pages = {56--109},
 doi = {https://doi.acm.org/10.1145/366378.366380},
}


@InProceedings{DoongF91,
  author = 	 "Roong-Ko Doong and Phyllis G. Frankl",
  title = 	 "Case studies on testing object-oriented programs",
  crossref =     "TAV91",
  pages = 	 "165--177",
}

@InProceedings{TurnerR93,
  author = 	 "Christopher D. Turner and David J. Robson",
  title = 	 "The State-Based Testing of Object-Oriented Programs",
  crossref =     "ICSM93",
  pages = 	 "302--310",
}

@InProceedings{HarroldMF92,
  author = 	 "Mary Jean Harrold and John D. McGregor and Kevin J. Fitzpatrick",
  title = 	 "Incremental testing of object-oriented class structures",
  crossref =     "ICSE92",
  pages = 	 "68--80",
}

@InProceedings{SouterPH99,
  author = 	 "Amie L. Souter and Lori L. Pollock and Dixie Hisley",
  title = 	 "Inter-class def-use analysis with partial class representations",
  crossref =     "PASTE99",
  pages = 	 "47--56",
}

@inproceedings{martena02dec,
  author =       {Vincenzo Martena and Alessandro Orso and Mauro Pezz{\`e}},
  authorASCII =  {Vincenzo Martena and Alessandro Orso and Mauro Pezze},
  title =        {Interclass Testing of Object Oriented Software},
  crossref =     "ICECCS2002",
  pages =        {145--154},
  year =         {2002},
}


%% Possible duplicates below!

@InProceedings{forrestermillerNt,
  author = 	 "Justin E. Forrester and Barton P. Miller ",
  title = 	 "An Empirical Study of the Robustness of {Windows NT} Applications Using Random Testing",
  crossref =     "USENIXWindows2000",
  pages = "59--68",
}

@inproceedings{kropp,
 author = {N. P. Kropp and P. J. Koopman and D. P. Siewiorek},
 title = {Automated Robustness Testing of Off-the-Shelf Software Components},
 booktitle = {FTCS '98: Proceedings of The Twenty-Eighth Annual International Symposium on Fault-Tolerant Computing},
 year = {1998},
 isbn = {0-8186-8470-4},
 pages = {230},
 publisher = {IEEE Computer Society},
 address = {Washington, DC, USA},
 }

@InProceedings{Ntafos98,
  author = 	 "Simeon Ntafos",
  title = 	 "On random and partition testing",
  crossref =     "ISSTA98",
  pages = 	 "42--48",
}


@InProceedings{Tonella2004,
  author = 	 "Paolo Tonella",
  title = 	 "Evolutionary testing of classes",
  crossref =     "ISSTA2004",
  pages = 	 "119--128",
}

@inproceedings{WapplerLammerman2005,
 author = {Stefan Wappler and Frank Lammermann},
 title = {Using evolutionary algorithms for the unit testing of object-oriented software},
 booktitle = {GECCO '05: Proceedings of the 2005 conference on Genetic and evolutionary computation},
 year = {2005},
 isbn = {1-59593-010-8},
 pages = {1053--1060},
 address  = {Washington DC, USA},
 doi = {https://doi.acm.org/10.1145/1068009.1068187},
 }

@InProceedings{GTCSV05,
  author =       {Wolfgang Grieskamp and Nikolai Tillmann and Colin Campbell and Wolfram Schulte and Margus Veanes},
  title =        {Action Machines -- Towards a Framework for Model Composition, Exploration and Conformance Testing Based on Symbolic Computation},
  crossref =     "QSIC2005",
  url =     {http://research.microsoft.com/research/pubs/view.aspx?type=Technical%20Report&id=913},
}

@TechReport{MarinovADKR2003,
    author = "D. Marinov and A. Andoni and D. Daniliuc and S. Khurshid and M. Rinard",
    title = "An evaluation of exhaustive testing for data structures",
    institution =  MITLCS,
    year = "2003",
    month = sep,
    number = "MIT/LCS/TR-921",
}


@InProceedings{YorshBS2006,
  author = 	 "Greta Yorsh and Thomas Ball and Mooly Sagiv",
  title = 	 "Testing, abstraction, theorem proving: Better together!",
  crossref =     "ISSTA2006",
  pages = 	 "145--155",
  abstract =
   "We present a method for static program analysis that leverages tests and
    concrete program executions. State abstractions generalize the set of
    program states obtained from concrete executions. A theorem prover then
    checks that the generalized set of concrete states covers all potential
    executions and satisfies additional safety properties. Our method finds the
    same potential errors as the most-precise abstract interpreter for a given
    abstraction and is potentially more efficient. Additionally, it provides a
    new way to tune the performance of the analysis by alternating between
    concrete execution and theorem proving. We have implemented our technique
    in a prototype for checking properties of C# programs.",
}


@Misc{Hartman2006,
  author = 	 "Alan Hartman",
  howpublished = "Personal communication",
  month = 	 jul,
  day = 	 jul # "~20,",
  year = 	 2006,
}


@InProceedings{HartmanN2004,
  author = 	 "A. Hartman and K. Nagin",
  title = 	 "The {AGEDIS} tools for model based testing",
  crossref =     "ISSTA2004",
  pages = 	 "129--132",
}

@inproceedings{torx,
 author = {Jan Tretmans and Ed Brinksma},
 title = {{TorX}: Automated model based testing},
 booktitle = {1st European Conference on Model Driven Software Engineering},
 year = {2003},
 pages = {31--43},
 address  = {Nuremberg, Germany},
}


@misc{reactis,
 author = "{Reactive Systems, Inc.}",
 title = "Reactis",
 howpublished = "\url{http://www.reactive-systems.com/}",
 }


@misc{conformiq,
 author = "Conformiq",
 title = "Conformiq test generator",
 howpublished = "\url{http://www.conformiq.com/}",
 }


@inproceedings{jartege,
  author    = {Catherine Oriat},
  title     = {{J}artege: A Tool for Random Generation of Unit Tests for
               {Java} Classes},
  booktitle = {QoSA/SOQUA},
  year      = {2005},
  month     = sep,
  pages     = {242--256},
}


@article{JPF,
 author = {Willem Visser and Klaus Havelund and Guillaume Brat and Seungjoon Park and Flavio Lerda},
 title = {Model Checking Programs},
 journal = ASEjournal,
 volume = {10},
 number = {2},
 year = {2003},
 issn = {0928-8910},
 pages = {203--232},
 doi = {http://dx.doi.org/10.1023/A:1022920129859},
 OMITpublisher = {Kluwer Academic Publishers},
 OMITqaddress = {Hingham, MA, USA},
 }


@inproceedings{groceHeuristics02,
 author = {Alex Groce and Willem Visser},
 title = {Model checking {Java} programs using structural heuristics},
  crossref =     "ISSTA2002",
 pages = {12--21},
 doi = {https://doi.acm.org/10.1145/566172.566175},
 }

@article{groceHeuristics04,
  author    = {Alex Groce and
               Willem Visser},
  title     = {Heuristics for model checking {Java} programs},
  journal   = STTT,
  volume    = {6},
  number    = {4},
  year      = {2004},
  pages     = {260--276},
  ee        = {http://www.springerlink.com/index/10.1007/s10009-003-0130-9},
  bibsource = {DBLP, http://dblp.uni-trier.de}
}


@inproceedings{autotest,
  author = {Ilinca Ciupa and Andreas Leitner},
  title = {Automatic Testing Based on Design by Contract},
  booktitle = {Workshop on Software Quality (SOQUA)},
  year = {2005},
  pages = {545--557},
  month = sep # "~19--22,",
  note = "SOQUA 2005 proceedings were published in the book Proceedings
    of Net.ObjectDays 2005",
}


@InProceedings{Ball2004:PCT,
  author = 	 "Thomas Ball",
  title = 	 "A Theory of Predicate-Complete Test Coverage and Generation",
  crossref =     "FMCO2004",
  pages = 	 "1--22",
}


@book{myersArt,
 author = {Glenford J. Myers and Corey Sandler},
 title = {The Art of Software Testing},
 year = {2004},
 isbn = {0471469122},
 publisher = {John Wiley \& Sons},
 }


@InProceedings{BaahGH2006,
  author = 	 "George K. Baah and Alexander Gray and Mary Jean Harrold",
  title = 	 "On-line anomaly detection of deployed software:  A statistical machine learning approach",
  booktitle = "SOQUA '06: Proceedings of the 3rd international workshop on Software quality assurance",
  pages = 	 "70--77",
  year = 	 2006,
  address = 	 "Portland, Oregon",
}


@Article{Runeson2006,
  author = 	 "Per Runeson",
  title = 	 "A Survey of Unit Testing Practices",
  journal = 	 IEEESoftware,
  year = 	 2006,
  volume = 	 23,
  number = 	 4,
  pages = 	 "22--29",
  month = 	 jul,
}


@InProceedings{MusluSW2011,
  author =       "K{\i}van{\c{c}} Mu{\c{s}}lu and Bilge Soran and Jochen Wuttke",
  authorASCII =  "Kivanc Muslu and Bilge Soran and Jochen Wuttke",
  title = 	 "Finding bugs by isolating unit tests",
  crossref =     "FSE2011NewIdeas",
  pages = 	 "496--499",
  supersededby = "ZhangJWMLEN2014",
}


@Article{YuLCZ2012,
  author = 	 "Yu, Kai and Lin, Mengxiang and Chen, Jin and Zhang, Xiangyu",
  title = 	 "Towards automated debugging in software evolution: Evaluating delta debugging on real regression bugs from the developers' perspectives",
  journal = 	 "J. Syst. Softw.",
  year = 	 2012,
  volume = 	 85,
  number = 	 10,
  pages = 	 "2305--2317",
  month = 	 oct,
  abstract =
   "Delta debugging has been proposed to isolate failure-inducing changes when
    regressions occur. In this work, we focus on evaluating delta debugging in
    practical settings from developers' perspectives. A collection of real
    regressions taken from medium-sized open source programs is used in our
    evaluation. Towards automated debugging in software evolution, a tool based
    on delta debugging is created and both the limitations and costs are
    discussed.
    \par
    We have evaluated two variants of delta debugging. Different from
    successful isolation in Zeller's initial studies, the results in our
    experiments vary wildly. Two thirds of isolated changes in studied programs
    provide direct or indirect clues in locating regression bugs. The remaining
    results are superfluous changes or even wrong isolations. In the case of
    wrong isolations, the isolated changes cause the same behaviour of the
    regression but are failure-irrelevant. Moreover, the hierarchical variant
    does not yield definite improvements in terms of the efficiency and
    accuracy.",
}


@InProceedings{NandaMSHO2011,
  author = 	 "Nanda, Agastya and Mani, Senthil and Sinha, Saurabh and Harrold, Mary Jean and Orso, Alessandro",
  title = 	 "Regression testing in the presence of non-code changes",
  crossref =     "ICST2011",
  pages = 	 "21--30",
}


@InProceedings{KimCZ2013,
  author = 	 "Kim, Taesoo and Chandra, Ramesh and Zeldovich, Nickolai",
  title = 	 "Optimizing unit test execution in large software programs using dependency analysis",
  crossref =     "APSys2013",
  pages = 	 "19:1--19:6",
}


@InProceedings{BellK2014,
  author = 	 "Bell, Jonathan and Kaiser, Gail",
  title = 	 "Unit test virtualization with {VMVM}",
  crossref =     "ICSE2014",
  pages = 	 "550--561",
}


@INPROCEEDINGS{bergelsonetal:EEE:2006,
  author = {Bergelson, B. and Exman, I.},
  title = {Dynamic test composition in hierarchical software testing},
  booktitle = {2006 IEEE 24th Convention of Electrical and Electronics Engineers
	in Israel},
  address = "Eilat, Israel",
  year = {2006},
  pages = {37--41},
  doi = {10.1109/EEEI.2006.321078},
  owner = {joe},
  timestamp = {2011.12.26}
}


@InProceedings{Bell2014,
  author = 	 "Bell, Jonathan",
  title = 	 "Detecting, isolating, and enforcing dependencies among and within test cases",
  crossref =     "FSE2014",
  pages = 	 "799--802",
}

@InProceedings{LuoHEM2014,
  author = 	 "Luo, Qingzhou and Hariri, Farah and Eloussi, Lamyaa and Marinov, Darko",
  title = 	 "An empirical analysis of flaky tests",
  crossref =     "FSE2014",
  pages = 	 "643--653",
}


@InProceedings{GauLCMW2015,
  author = 	 "Zebao Gao and Yalan Liang and Myra B. Cohen and Atif M. Memon and Zhen Wang",
  title = 	 "Making system user interactive tests repeatable: When and what should we control?",
  crossref =     "ICSE2015",
  pages = 	 "55--65",
}

@InProceedings{ArltMPW2015,
  author = 	 "Arlt, Stephan and Morciniec, Tobias and Podelski, Andreas and Wagner, Silke",
  title = 	 "If {A} fails, can {B} still succeed? Inferring dependencies between test results in automotive system testing",
  crossref =     "ICST2015",
  pages = 	 "1--10",
}

@InProceedings{XuKKRC2010,
  author = 	 "Xu, Zhihong and Kim, Yunho and Kim, Moonzoo and Rothermel, Gregg and Cohen, Myra B.",
  title = 	 "Directed test suite augmentation: techniques and tradeoffs",
  crossref =     "FSE2010",
  pages = 	 "257--266",
}

@InProceedings{SantelicesCAOH2008,
  author = 	 "Raul Santelices and Pavan Kumar Chittimalli and Taweesup Apiwattanapong and Alessandro Orso and Mary Jean Harrold",
  title = 	 "Test-suite augmentation for evolving software",
  crossref =     "ASE2008",
  pages = 	 "218--227",
}

@InProceedings{GligoricGZSAM2013,
  author = 	 "Gligoric, Milos and Groce, Alex and Zhang, Chaoqiang and Sharma, Rohan and Alipour, Mohammad Amin and Marinov, Darko",
  title = 	 "Comparing non-adequate test suites using coverage criteria",
  crossref =     "ISSTA2013",
  pages = 	 "302--313",
}


@InProceedings{BellKMD2015,
  author =       "Bell, Jonathan and Kaiser, Gail and Melski, Eric and Dattatreya, Mohan",
  title =        "Efficient dependency detection for safe {Java} test acceleration",
  crossref =     "FSE2015",
  pages =     "770-781",
}


@InProceedings{CifuentesHKLLMMS2009,
  author =       "Cifuentes, Cristina and Hoermann, Christian and Keynes, Nathan and Li, Lian and Long, Simon and Mealy, Erica and Mounteney, Michael and Scholz, Bernhard",
  title =        "{BegBunch}: Benchmarking for {C} Bug Detection Tools",
  crossref =     "Defects2009",
  pages =     "16--20",
}


@InProceedings{WaterlooPE2015,
  author =       "Waterloo, Matias and Person, Suzette and Elbaum, Sebastian",
  title =        "Test Analysis: Searching for Faults in Tests",
  crossref =     "ASE2015",
  pages =     "149--154",
}


@InProceedings{ChenBHXZX2017,
  author =       "Junjie Chen and Yanwei Bai and Dan Hao and Yingfei Xiong and Hongyu Zhang and Bing Xie",
  title =        "Learning to prioritize test programs for compiler testing",
  crossref =  "ICSE2017",
  NEEDpages =     "*",
}


@article{briski2008minimizing,
    title={Minimizing code defects to improve software quality and lower development costs},
    author={Briski, Kari Ann and Chitale, Poonam and Hamilton, Valerie and Pratt, Allan and Starr, Brian and Veroulis, Jim and Villard, Bruce},
    journal={Development Solutions White Paper. IBM.},
    year={2008},
}


@InProceedings{RahmanR2018,
  author = 	 "Rahman, Md Tajmilur and Rigby, Peter C.",
  title = 	 "The impact of failing, flaky, and high failure tests on the number of crash reports associated with {Firefox} Builds",
  crossref =  "FSE2018",
  pages = 	 "857--862",
}

@Misc{JaCoCo,
  author = 	 "{EclEmma team}",
  title = 	 "{JaCoCo} {Java} code coverage library",
  howpublished = "\url{https://www.eclemma.org/jacoco/}",
  month = 	 mar,
  year = 	 2024,
}


%%%
%%% end
%%%

% LocalWords: TechReport NIST OPTkey OPTaddress OPTnote OPTannote Karr TombBV
% LocalWords: Inspec Marron Audris Mockus goel Amrit TSE dec NHPP Rong ClauseO
% LocalWords: Yamada Tokuno Osaki Huei Hou Sy Kuo Yi Soochow HGDM Ohba inhouse
% LocalWords: ISSRE InProceedings Mitsuru Xiao Mei Chou booktitle addr Ilinca
% LocalWords: ISSN ISBN genterms annote Eick Trans GravesKMS Siy jul Csallner's
% LocalWords: ChristensonH BF FOF ESS nonseeded Yu Shen Dunsmore Bev Oriol kLOC
% LocalWords: Frankl Littlewood Strigini aug correctedby apr misc ISSE BernerWK
% LocalWords: irvine Offutt url tewary Kanupriya offutt Zhang TR jan Berner IP
% LocalWords: HowdenH Howden Yudong trustability Ntafos Michal Simeon Lawrance
% LocalWords: InCollection pseudorandom referencedby subdomains Morell Namin UI
% LocalWords: subdomain nonhomogeneity Podgurski MillerMNPNMV Noonan Palulu DUT
% LocalWords: Nichol Branson Murrill Voas Bingchiang Jeng pp Weyuker Palulu's
% LocalWords: MillerFS Fredriksen doi PhdThesis Meudec Christophe VDM MarianiPP
% LocalWords: OPTtype OPTmonth behaviour SL ChangR Juei sep SST ADL UC Mariani
% LocalWords: ADLscope Microsystems SST's Nierstrasz Lemoine SEN ISSTA Roos NT
% LocalWords: ChangRS ADLT HayesS API UCI Sriram Sankar OPTeditor Ziel Saff MDE
% LocalWords: automatable SCT minterm Palo TDD ACL Balcer Hasling TAV ElbaumCDD
% LocalWords: Ostrand Kemmerer TSL Stuppy Reinig Rea CIS RichardsonOT Dwyer se
% LocalWords: O'Malley IGNOREeditor Zhenyi Jin Jie feb coden SPEXBL IE Dokulil
% LocalWords: bibdate pdf ack nhfb nov dvi Mothra DeMillo Guindi Liu Kapfhammer
% LocalWords: OffuttL Shaoying SOFL JSSODM CDFDs statechart UK YCS MC DUTs
% LocalWords: OPTnumber statecharts DC subterm Donat TOSEM TAPSOFT ij Saff's
% LocalWords: detectability Bidoit Dauchet OPTseries OPTvolume BDDs OO SCARPE
% LocalWords: TothDJoyce DNF Kalman Toth INCOSE DickF Faivre FME lncs Shrinivas
% LocalWords: OPTcrossref OPTorganization OPTpublisher Odense BernotGM Joshi
% LocalWords: SEJ Gilles Bernot Gaudel Marre IEE HoffmanSW Strooper OP subsytem
% LocalWords: Citeseer HoffmanS pstroop dhoffman GrieskampGSV Gurevich Saarland
% LocalWords: Grieskamp Schulte Margus Veanes ASML ZhuHM Zhu Zweben's webpages
% LocalWords: Proc jun CACMA GoodenoughG Goodenough Gerhart Weyuker's Bhat
% LocalWords: Elbaum EASOF DeMilloLS Sayward OffuttU McDermid Doong FL SMT
% LocalWords: Frankl's Kirani Tsai's Kung al's Mothra's Ghosh Mathur minimality
% LocalWords: ADT CORBA DCOM RMI Jini Vincenzi Barbosa Delamaro Offut ller
% LocalWords: Rothermel Zapf Untch Vadim Okum Yaacov Yesha CTL AmmannB MUT
% LocalWords: SMV KnightA Ammann San oct GuptaMS Neelam Aditya Soffa EiffelBase
% LocalWords: fse ClaessenH Koen Claessen QuickCheck ICFP QuickCheck's DSE
% LocalWords: Jtest OPTauthor Parasoft OPTedition OPTyear DbC atsign EDSER
% LocalWords: WeyukerGS Tarak Goradia Ashutosh Singh Leveson's TCAS PL Pex
% LocalWords: SPE CsallnerS JCrasher Christoph Csallner Yannis mernst WODA
% LocalWords: Smaragdakis ESEC pre OPTpages IllegalArgumentException Keele
% LocalWords: IllegalStateException NullPointerException HarroldGS OPs Exe
% LocalWords: UniqueBoundedStack Harrold Rajiv LeungW RothermelH Leung Dsc
% LocalWords: Hareton ICSM rothermel Filippos Vokolos Pasquini Crespo UIs
% LocalWords: Matrella PasquiniCM Adalberto Nobiato Paolo ATSMER Hira nd
% LocalWords: SrivastavaT Amitabh Srivastava Thiagarajan WongHLA Chu Tichy
% LocalWords: RothermelUCH Horgan Agrawal Chengyun ElbaumMR Alexy CNF Boby
% LocalWords: Malishevsky MalishevskyRE Alexey JonesH isNumber arSt CC CRC
% LocalWords: arnumber ared arAuthor Rosenblum Binkley RenSTRC Xiaoxia JC
% LocalWords: Ren Fenil Chesley usesDaikonAsTestSubject ren oopsla DCS SSV
% LocalWords: downloadsnonlocal NEEDaddress supersededby RenSTRCD Orso tse
% LocalWords: SinhaOH Saurabh Sinha Alessandro HendersonW Weiser CMU Emam
% LocalWords: VisiProg plezbertdoes Plezbert Cytron Wadler xpexplained hme
% LocalWords: Galanter Pribram HCII TOPLAS Siegel Shel OrsoLHL Donglin Mak
% LocalWords: Liang OrsoAH Taweesup Apiwattanapong LawR PathImpact dag sed
% LocalWords: DAGs junit howpublished SoffaContinuousTesting ChildersS tav
% LocalWords: Childers ChildersDS JohnsonKACMMZD Hongbing Kou Jitender jsi
% LocalWords: Miglani Shenyan Zhen Doane PSP Hackystat csdl Boehm BNR ohba
% LocalWords: baziuk NORTEL KimFreq Parnas icse Zeller Andreas SIGSOFT Hsu
% LocalWords: ZellerH Ralf Hildebrandt vol BoyapatiKM Boyapati Sarfraz Hwa
% LocalWords: Khurshid Darko Marinov Korat sortedness repOK WhaleyML Shihs
% LocalWords: Whaley FSM submodels submodel FSMs joeq lastRet AlurCMN grep
% LocalWords: Rajeev Alur Pavol ern Madhusadan Wonhong NEEDpages DFA Tarvo
% LocalWords: ServerTableEntry prev Nicol PodgurskiLFMMSW Wes Masri XP von
% LocalWords: Minch Jiayang gcc jikes javac PavlopoulouY Pavlopoulou Bohme
% LocalWords: ASQ EMF HansonR Jos Rosinski subprojects nonmetric IPDPS Hu
% LocalWords: Delisle Vimal Begwani Karasick Weide CBSE int WildeS JAX GRT
% LocalWords: Scully StottsLA Stotts Antley XPAU Guttag's BeckG Beyer gzip
% LocalWords: BeyerCHJM Chlipala Henzinger Ranjit Jhala Rupak Majumdar TF
% LocalWords: Comput Sci Coll IESEDJ retesting ChillaregeBCHMRW Jarir Hwei
% LocalWords: Chillarege Inderpal Bhandari Chaar Halliday Moebus Yuen Garg
% LocalWords: ODC lifecycle overclaims Jinlin Coppit mathematize jac duran
% LocalWords: njt Mander ASE followon Korel Optimisation IFIP DCIA CFG IDF
% LocalWords: Toyn ICFEM FergusonK Bogdan abs GCD ESC CnC JABA JML OSM Nee
% LocalWords: intraprocedural OrsoSH Nanjuan Shi KLOC superclasses PDG Aly
% LocalWords: checkins dfej OMITseries CleveZ Holger Cleve Zeller's BB Qu
% LocalWords: Brun argc RuthruffBR Ruthruff Rothermel's NC AnnalsSE al un
% LocalWords: EickGKMM YuSD OMITeditor DallmeierLZ Dallmeier Lindig Su cdd
% LocalWords: NanoXML XieMSN Symstra TACAS XieMN WholeSeq ModifyingSeq DMA
% LocalWords: WholeState MonitorEquals PairwiseEquals XieN Obstra AsmL GDB
% LocalWords: Tassey EuroStar StarEast AndrewsBL Briand Labiche KimPR SH94
% LocalWords: APFD addtl FEP stmt MarinovK TestEra Rehg TestEra's RTS Dinh
% LocalWords: concretization BowringRH Bowring KimP revalidate LRU def Xu
% LocalWords: LeonP LeonMP Godefroid Klarlund Koushik Sen enum MCP BBE de
% LocalWords: DUP IFP SliceP APIs Ince testee Abhik Roychoudhury Xie Jiang
% LocalWords: Korel's prioritizations Khurshid's TillmanS Tillmann HDD BBN
% LocalWords: PUTs wangtao DavisW Ghassan Misherghi Zhendong mal Qiu ARTOO
% LocalWords: GodefroidKS Praveen Kallakuri Xuemei issn NY Panzl Xie's ase
% LocalWords: EqualsBuilder reflectionEquals Jtest's Unfiled VisserPP icrs
% LocalWords: ClassBench Visser val DART's reanu Radek Pel nek Pelanek A1
% LocalWords: authorASCII Pasareanu VisserPK PathFinder SenMA Gul Agha A2
% LocalWords: Concolic SenA jCUTE CAV YuanX Hai Substra ATM usesDaikon TLD
% LocalWords: substra Substra's StoerzerRRT Stoerzer YuanM Xun Atif LeitnerOZCM
% LocalWords: Memon AUT AUT's OSS HiveMind PachecoLET PhongpaibulB Huo RQ1
% LocalWords: Monvorath Phongpaibul ISESE ChenTseChen Yan Tse TACCLE Ko T1
% LocalWords: DoongF Roong TurnerR HarroldMF SouterPH Souter Hisley WA RQ2
% LocalWords: inproceedings martena Vincenzo Pezz Pezze ICECCS kropp ee M1
% LocalWords: forrestermillerNt Forrester Koopman Siewiorek FTCS isbn STVR
% LocalWords: Tonella WapplerLammerman Wappler Lammermann GECCO GTCSV SERC
% LocalWords: QSIC MarinovADKR Andoni Daniliuc MITLCS YorshBS Yorsh JPF M9
% LocalWords: Mooly Sagiv HartmanN Nagin AGEDIS torx Tretmans Brinksma RQ3
% LocalWords: TorX reactis conformiq jartege Oriat artege QoSA SOQUA Gheyi
% LocalWords: Erfurt Havelund Guillaume Seungjoon Flavio Lerda Kluwer Auri
% LocalWords: ASEjournal OMITpublisher OMITqaddress Hingham Groce STTT Yoo
% LocalWords: groceHeuristics bibsource DBLP WangER Zhimin middleware Okun
% LocalWords: TourApp Salber Dey Abowd Yuan CohenDFP Siddhartha Dalal Thev
% LocalWords: Fredman AETG autotest Ciupa Leitner ObjectDays ballPCT Rohit
% LocalWords: PCT FMCO myersArt Glenford Sandler BaahGH Baah Papagiannakis
%  LocalWords:  NIST02 Beizer Hartman2002 ISSTA2002 ISSTA2002addr Nagappan
%  LocalWords:  ISSTA2002date Nachiappan Maximilien Thirumalesh Padberg P'
%  LocalWords:  Erdogmus SynthiaMock subexpressions StreamingFeeder Mainul
%  LocalWords:  instanceof testing's IslamC2014 RobinsonEPAL2011 Khaled M2
%  LocalWords:  Klocwork Kupsch Emanuelsson Engler goel85a ICSE89 YuSD88
%  LocalWords:  ICSE89addr ICSE89date EickGKMM2001 GravesKMS2000 frankl B1
%  LocalWords:  ChristensonH96 offutt94experiments HowdenH95 BohmeP2014 B2
%  LocalWords:  Soumya Boehme FSE2014 FSE2014date FSE2014addr Hamlet94 T14
%  LocalWords:  Kolmogorov MillerMNPNMV92 MillerFS90 Xie2006 ECOOP2006 s1
%  LocalWords:  ECOOP2006addr ECOOP2006date ASIAN'04 JiangZCT2009 Zhenyu
%  LocalWords:  ASE2009 ASE2009date ASE2009addr Jaccard AutoTest Soares s2
%  LocalWords:  SoaresGSM2010 Serey Massoni Tiago KleinFF2010 Findler Cang
%  LocalWords:  stateful OOPSLA2010 OOPSLA2010addr OOPSLA2010date DrRacket
%  LocalWords:  subclassed DrScheme PradelG2012 Pradel ICSE2012 Eclat Kwon
%  LocalWords:  ICSE2012date ICSE2012addr GaneshLR2009 whitebox ICSE2009
%  LocalWords:  ICSE2009date ICSE2009addr TanMTL2012 tComment ICST2012 Wp
%  LocalWords:  ICST2012date ICST2012addr Felleisen param Garg2013 Pranav
%  LocalWords:  Ivancic Franjo Balakrishnan Gogul Maeda Naoto Aarti Herzig
%  LocalWords:  mendeley randoop partnerID tZOtx3y1 GenProg Meudec98 FSE99
%  LocalWords:  ChangR99 FSE99addr FSE99date ChangRS96 HayesS94 ISSTA96 Qi
%  LocalWords:  balcer TAV89 TAV89date tradeoffs RichardsonOT89 extendible
%  LocalWords:  OffuttL99 Burton99 Donat97 TAPSOFT97 TothDJoyce96 INCOSE96
%  LocalWords:  DickF93 FME93 BernotGM91 Prolog HoffmanSW99 HoffmanS00 IST
%  LocalWords:  HoffmanS97 GrieskampGSV2002 GallerWW2010 Galler Weiglhofer
%  LocalWords:  Wotawa SEFM2010 SEFM2010date SEFM2010addr noncompatible TP
%  LocalWords:  ZhuHM97 Hamlet87 WangER2007 ICSE2007 ICSE2007addr Rahul x1
%  LocalWords:  ICSE2007date BernerWK2007 Gopinath KapfhammerS2003 FSE2003
%  LocalWords:  FSE2003date FSE2003addr Trung Trong Naixin Yashwant Denton
%  LocalWords:  Malaiya InozemtsevaH2014 ICSE2014 ICSE2014date Zimmermann
%  LocalWords:  ICSE2014addr GopinathJG2014 ISSRE2014 ISSRE2014date Thung
%  LocalWords:  ISSRE2014addr KochharTL2015 Kochhar Pavneet Ferdian CACMA2
%  LocalWords:  SANER2015 SANER2015date SANER2015addr Milieux goodenough
%  LocalWords:  GoodenoughG75 LeitnerOZCM2007 ASE2007 ASE2007addr HsuO2009
%  LocalWords:  ASE2007date ShiGGZM2014 Gyori Gligoric Milos Zaytsev Ronne
%  LocalWords:  WongHLM1995 ICSE95 ICSE95date ICSE95addr RothermelHOH1998
%  LocalWords:  Ostrin ICSM98 ICSM98date ICSM98addr RothermelHvRH2002 Umar
%  LocalWords:  AndrewsBL2005 AgrawalDHHHKMMS2006 BarbosaMV2001 Proteum x2
%  LocalWords:  MuJava Javalanche Natella Cotroneo Henrique DeMilloLS78 lu
%  LocalWords:  OffuttU2000 Hiralal Krauser Rizzo KimCM2001 GhoshM2001 Das
%  LocalWords:  VincenziMXD2001 BlackOY2001 BlackOY2000 ASE2000 formulae
%  LocalWords:  ASE2000addr ASE2000date AmmannB2001 KnightA85 ICSE85 Daran
%  LocalWords:  ICSE85addr ICSE85date ICSE2005 ICSE2005addr ICSE2005date
%  LocalWords:  Fosse Offutt92 AndrewsBLN2006 IEEETSE Woodward93 MaOK2005
%  LocalWords:  nonmonotonic Seung SmithW2007 TAICPART2007 MastersThesis
%  LocalWords:  TAICPART2007addr TAICPART2007date Umar2006 Maryam Lingming
%  LocalWords:  ZhangMZK2012 ISSTA2012 ISSTA2012date ISSTA2012addr fse98
%  LocalWords:  GuptaMS98 fse98addr fse98date ClaessenH2000 ICFP2000 JTest
%  LocalWords:  ICFP2000addr ICFP2000date ClaessenH2002 WeyukerGS94 Artho
%  LocalWords:  MaAZSGR2015 Hiroyuki Sato Gmeiner Ramler ASE2015 GRT's Luo
%  LocalWords:  ASE2015date ASE2015addr HuangDME2012 InputStream Meszaros
%  LocalWords:  ClauseO2007 ElbaumCDD2006 FSE2006 FSE2006addr FSE2006date
%  LocalWords:  JoshiO2007 ICSM2007 ICSM2007addr ICSM2007date Weimer Nael
%  LocalWords:  HooimeijerW2007 AlyA1988 IEEETR NullPointerExceptions val1
%  LocalWords:  NEEDvolume NEEDnumber Just2014 ISSTA2014 ISSTA2014date HSD
%  LocalWords:  ISSTA2014addr CsallnerS2004 backtrace CsallnerS2005 val2
%  LocalWords:  TombBV2007 ISSTA2007 ISSTA2007addr ISSTA2007date PLDI2005
%  LocalWords:  GodefroidKS2005 PLDI2005addr PLDI2005date Godefroid2007 F1
%  LocalWords:  POPL2007 POPL2007addr POPL2007date VisserPP2006 ISSTA2006
%  LocalWords:  ISSTA2006addr ISSTA2006date VisserPK2004 ISSTA2004 FSE2005
%  LocalWords:  ISSTA2004addr ISSTA2004date SenMA2005 FSE2005addr SenA2006
%  LocalWords:  FSE2005date CAV2006 CAV2006addr CAV2006date QuR2011 Staats
%  LocalWords:  ESEM2011 ESEM2011addr ESEM2011date YuanX2006 AST2006 Sasa
%  LocalWords:  AST2006addr AST2006date YuanM2007 PachecoLEB2007 Heimdahl
%  LocalWords:  CohenDFP97 StaatsGH2012 MisailovicMPKM2007 Misailovic Sina
%  LocalWords:  Milicevic Aleksandar Petrovic Nemanja FSE2007 FSE2007date
%  LocalWords:  FSE2007addr FraserZ2011 ISSTA2011 ISSTA2011date Howden1975
%  LocalWords:  ISSTA2011addr IEEETC ShamshiriJRFMA2015 Shamshiri McMinn
%  LocalWords:  Arcuri CeccatoMMNT2015 Ceccato Marchetto mortem JINSI Fiva
%  LocalWords:  LeitnerCOMF2007 fse2007 ReCrash deserializes ChoiH98 Jong
%  LocalWords:  Deok Choi Harini Srinivasan multithreaded SPDT98 Pravir nj
%  LocalWords:  SPDT98addr SPDT98date StevenCFP2000 jRapture ISSTA2000 Rui
%  LocalWords:  ISSTA2000addr ISSTA2000date NarayanasamyPC2005 Satish Zhou
%  LocalWords:  Narayanasamy Pokam BugNet ISCA2005 ISCA2005addr Geels Oivo
%  LocalWords:  ISCA2005date NarayanasamyPC2006 GeelsASS2006 Gautam Stoica
%  LocalWords:  Altekar Shenker USENIX2006 USENIX2006addr USENIX2006date
%  LocalWords:  liblog liblog's XuBH2003 Rastislav ISCA2003 ISCA2003addr
%  LocalWords:  ISCA2003date DunlapKCBC2002 Sukru Cinar Murtaza Basrai Gao
%  LocalWords:  ReVirt OSDI2002 OSDI2002addr OSDI2002date Oliveira Chong
%  LocalWords:  deOliveriaCWWSC2006 ExecRecorder ASID2006 ASID2006addr Duy
%  LocalWords:  ASID2006date checkpointing LeBlancC1987 LeBlanc Mellor JNI
%  LocalWords:  Crummey SrinivasanKAZ2004 Sudarshan Srikanth Kandula Kunal
%  LocalWords:  Yuanyuan USENIX2004 USENIX2004addr USENIX2004date Taneja
%  LocalWords:  TanejaX2008 DiffGen ASE2008 ASE2008addr ASE2008date tracey
%  LocalWords:  ASE98 ASE98addr ASE98date FergusonK96 Korel90 YooH2012 AVL
%  LocalWords:  Fucci Scanniello Sigweni Uyaguari Burak Turhan Juristo hdd
%  LocalWords:  Markku ESEM Kruskal pvalue QLTY Kim00 KimPR00 KimPR2000 ps
%  LocalWords:  ICSE2000 ICSE2000addr ICSE2000date KimP2002 HarroldGS93 gz
%  LocalWords:  LeungW89 RothermelH96 rothermel96analyzing ICSM89 icsm joe
%  LocalWords:  ICSM89addr ICSM89date vokolos PasquiniCM96 PasquiniCM97 uq
%  LocalWords:  SrivastavaT2002 WongHLA97 RothermelUCH2001 ISSRE97 ICSM99
%  LocalWords:  ISSRE97addr ISSRE97date ElbaumMR2000 ElbaumMR2001 ICSE2001
%  LocalWords:  ICSE2001addr ICSE2001date RothermelUCH1999 ICSM99addr init
%  LocalWords:  ICSM99date RothermelEMKQ2004 ElbaumMR2002 ICSM2002 Tongyu
%  LocalWords:  MalishevskyRE2002 ICSM2002addr ICSM2002date JonesH01 Penix
%  LocalWords:  ICSM2001 ICSM2001addr ICSM2001date ICSE2002 ICSE2002addr
%  LocalWords:  ICSE2002date Binkley97 RenSTRC2004 OOPSLA2004 oopsla2004
%  LocalWords:  OOPSLA2004addr OOPSLA2004date RenSTRCD2003 StoerzerRRT2006
%  LocalWords:  ALTERNATEdownload StoerzerRRT2005 SinhaOH2004 ICSE2004 SSL
%  LocalWords:  ICSE2004addr ICSE2004date HarroldJLLOPSSG2001 Pennings TLS
%  LocalWords:  Maikel Gujarathi Ashish OOPSLA2001 OOPSLA2001date FSE2004
%  LocalWords:  OOPSLA2001addr OrsoSH2004 FSE2004addr FSE2004date QuCR2008
%  LocalWords:  LeonP2003 ISSRE2003 ISSRE2003addr ISSRE2003date LeonMP2005
%  LocalWords:  MarianiPP2007 ISSTA2008 ISSTA2008addr ISSTA2008date GSDTR
%  LocalWords:  ElbaumRP2014 HerzigGCM2015 Marijan Gottleib Greiler Jacek
%  LocalWords:  Czerwonka ICSE2015 ICSE2015date ICSE2015addr Vallespir JRE
%  LocalWords:  Herzig2016 intern's RummelKT2005 Thall SAC2005 SAC2005date
%  LocalWords:  SAC2005addr BriandLH2009 CzerwonkaDNTT2011 Teterev Nardo
%  LocalWords:  ICST2011 ICST2011date ICST2011addr DiNardoABL2013 Yvan UNO
%  LocalWords:  Alshahwan prioritisation ICST2013 ICST2013date YooH2007 au
%  LocalWords:  ICST2013addr Issta2007addr BeszedesGSJLG2013 Arpad Gergely
%  LocalWords:  Beszedes Lajos Schrettner Judit Jasz Lango Tibor Gyimothy
%  LocalWords:  ICSM2013 ICSM2013date ICSM2013addr Poshyvanyk Tukey POPL97
%  LocalWords:  Issta2007addr ZhangHZRM2013 HendersonW85 POPL97addr POPL80
%  LocalWords:  POPL97date POPL80addr Miller2002 Nix85 Siegel96 OrsoAH2003
%  LocalWords:  OrsoLHL2002 LawR2003 ChildersDS2003 JohnsonKACMMZD2003 Kun
%  LocalWords:  ICSE2003 ICSE2003addr ICSE2003date csdl2 Boehm1976 ISSRE95
%  LocalWords:  Boehm1981 baziuk1995 ISSRE95addr ISSRE95date ISESE2006 htm
%  LocalWords:  PhongpaibulB2006 ISESE2006addr ISESE2006date Zeller1999 xs
%  LocalWords:  Zeller2002 FSE2002 FSE2002addr FSE2002date CleveZ2005 Tien
%  LocalWords:  ZellerH2002 Zeller2005 CC2005 CC2005addr DallmeierLZ2005
%  LocalWords:  ECOOP2005 ECOOP2005addr ECOOP2005date PapadakisLT2015 Kwak
%  LocalWords:  Papadakis Traon Metallaxis GonzalezSanchezAGvG2011 Abreu
%  LocalWords:  Gemund Arjan AAAI2011 AAAI2011date AAAI2011addr AliADW2009
%  LocalWords:  Shaimaa Dhandapani Tamilselvi Wantao ZoeteweijAGvG2007 nec
%  LocalWords:  Zoeteweij Golsteijn ECBS2007 ECBS2007date ECBS2007addr CPM
%  LocalWords:  WongDGL2014 Vidroha Debroy Ruizhi Yihao DStar LeTL2013 DDR
%  LocalWords:  PapadakisLT2012 HongLKJKKK2015 Byeongcheol Taehoon Jeon uk
%  LocalWords:  Yiru Bongsuk Yunho Moonzoo MBFL Prog underserved mutatable
%  LocalWords:  Parnas94 ICSE94 icse94addr icse94date Whittaker97 pp70 htt
%  LocalWords:  BoyapatiKM2002 MarinovK2001 ASE2001 ASE2001addr POPL2005
%  LocalWords:  ASE2001date WhaleyML2002 AlurCMN2005 POPL2005addr ICSE99
%  LocalWords:  POPL2005date PodgurskiLFMMSW03 PavlopoulouY99 ICSE99addr
%  LocalWords:  ICSE99date HansonR85 IPDPS2003 IPDPS2003addr IPDPS2003date
%  LocalWords:  Magpie84 CC84 CC84addr CC84date Karasick98 FSE98addr Yehia
%  LocalWords:  FSE98date Weide2001 CBSE2001 CBSE2001addr CBSE2001date csr
%  LocalWords:  WildeS95 StottsLA2002 XPAU2002 XPAU2002addr XPAU2002date
%  LocalWords:  BeckG98 BeyerCHJM2004 ChillaregeBCHMRW92 RuthruffBR2005 sg
%  LocalWords:  XieMSN2005 TACAS2005 TACAS2005addr TACAS2005date Rostra
%  LocalWords:  XieMN2004 ASE2004 ASE2004addr ASE2004date XieN2004 Bassem
%  LocalWords:  ICFEM2004 ICFEM2004addr ICFEM2004date BowringRH2004 Zayour
%  LocalWords:  TillmanS2005 DavisW81 ElkarabliehZK2007 Elkarablieh Juzi
%  LocalWords:  ECOOP2007 ECOOP2007addr ECOOP2007date Suen Dicos ICSE2011
%  LocalWords:  Issta2007addr StaatsWH2011 ICSE2011addr ICSE2011date Ayse
%  LocalWords:  Calikli Arslan Bener PPIG ChenTseChen2001 DoongF91 TAV91
%  LocalWords:  TAV91addr TAV91date TurnerR93 ICSM93 ICSM93addr ICSM93date
%  LocalWords:  HarroldMF92 ICSE92 ICSE92addr ICSE92date SouterPH99 Vilas
%  LocalWords:  PASTE99 PASTE99addr PASTE99date martena02dec ICECCS2002
%  LocalWords:  ICECCS2002addr ICECCS2002date USENIXWindows2000 Ntafos98
%  LocalWords:  USENIXWindows2000addr USENIXWindows2000date ISSTA98 Gvero
%  LocalWords:  ISSTA98addr ISSTA98date Tonella2004 WapplerLammerman2005
%  LocalWords:  GTCSV05 QSIC2005 MarinovADKR2003 YorshBS2006 Hartman2006
%  LocalWords:  HartmanN2004 groceHeuristics02 groceHeuristics04 Ball2004
%  LocalWords:  FMCO2004 FMCO2004addr FMCO2004date BaahGH2006 Runeson2006
%  LocalWords:  Runeson IEEESoftware ReAssert Jagannath followup Tihomir
%  LocalWords:  Jiaa Jurand Nogiec MusluSW2011 Soran Wuttke FSE2011 Pingyu
%  LocalWords:  FSE2011addr FSE2011date ZhangJWMLEN2014 YuLCZ2012 Xiangyu
%  LocalWords:  Mengxiang Syst Softw Strecker FIXME TODO NandaMSHO2011 yao
%  LocalWords:  Nanda Agastya Senthil KimCZ2013 Taesoo Ramesh Zeldovich
%  LocalWords:  APSys2013 APSys2013date APSys2013addr BellK2014 VMVM Exman
%  LocalWords:  Bergelson Bell2014 LuoHEM2014 Qingzhou Hariri Eloussi Arlt
%  LocalWords:  Lamyaa GauLCMW2015 Zebao Yalan ArltMPW2015 Morciniec Silke
%  LocalWords:  Podelski ICST2015 ICST2015date ICST2015addr XuKKRC2010 Xia
%  LocalWords:  Zhihong FSE2010 FSE2010date FSE2010addr SantelicesCAOH2008
%  LocalWords:  Santelices Pavan Chittimalli GligoricGZSAM2013 Chaoqiang
%  LocalWords:  Sharma Rohan Alipour Amin ISSTA2013 ISSTA2013date Yuting
%  LocalWords:  Issta2013addr frankencert mucert mucerts frankencerts ET's
%  LocalWords:  BellKMD2015 Melski Dattatreya Mohan FSE2015 FSE2015date
%  LocalWords:  FSE2015addr ElectricTest DTDetector javaagent Cifuentes
%  LocalWords:  CifuentesHKLLMMS2009 Hoermann Mounteney Scholz BegBunch
%  LocalWords:  Defects2009 Defects2009date Defects2009addr Andrzejak Shao
%  LocalWords:  JChord Hbase ConfErr crossref Beizer90 arNumber isnumber
%  LocalWords:  dec94a8 citeseer irvine95effectiveness html Groce's csee
%  LocalWords:  offutt92empirical uvic MoonKKY2014 Seokhyeon ICST2014 sig
%  LocalWords:  TitcheuChekamPLTH2017 Titcheu Chekam Thierry ICSE2017 bst
%  LocalWords:  Lampropoulos Gallois Catalin Hritcu Bool expr lang york
%  LocalWords:  OOPSLACompanion2003 edu ESR LCS FSE2011NewIdeas VmVm EEE
%  LocalWords:  bergelsonetal Eilat WaterlooPE2015 Joda ChenBHXZX2017 Bai
%  LocalWords:  Junjie Yanwei Hao Yingfei Xiong Hongyu Csmith Weka EMI
% LocalWords:  BugBuilder RefactoringMiner