version-control.bib

% Bibliography about version control and merge conflicts


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Diff
%%%

@inproceedings{DBLP:conf/c++/Grass92,
  author    = {Judith E. Grass},
  title     = {Cdiff: {A} Syntax Directed Differencer for {C++} Programs},
  booktitle = {Proceedings of the {C++} Conference. Portland, OR, USA, August 1992},
  pages     = {181--194},
  year      = {1992},
  timestamp = {Thu, 15 May 2014 18:36:26 +0200},
  biburl    = {https://dblp.org/rec/conf/c++/Grass92.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Diff -- line-based
%%%

@Article{NugrohoHM2020,
  author = 	 "Nugroho, Yusuf Sulistyo and Hata, Hideaki and Matsumoto, Kenichi",
  title = 	 "How different are different \emph{diff} algorithms in {Git}?",
  journal = 	 JEmpiricalSE,
  year = 	 2020,
  volume = 	 25,
  number = 	 1,
  pages = 	 "790-823",
  month = 	 jan,
  abstract =
   "Automatic identification of the differences between two versions of a file is a
    common and basic task in several applications of mining code repositories. Git,
    a version control system, has a diff utility and users can select algorithms of
    diff from the default algorithm Myers to the advanced Histogram algorithm. From
    our systematic mapping, we identified three popular applications of diff in
    recent studies. On the impact on code churn metrics in 14 Java projects, we
    obtained different values in 1.7\% to 8.2\% commits based on the different diff
    algorithms. Regarding bug-introducing change identification, we found 6.0\% and
    13.3\% in the identified bug-fix commits had different results of
    bug-introducing changes from 10 Java projects. For patch application, we found
    that the Histogram is more suitable than Myers for providing the changes of
    code, from our manual analysis. Thus, we strongly recommend using the Histogram
    algorithm when mining Git repositories to consider differences in source code.",
}


@Misc{Cohen2010,
  author = 	 "Bram Cohen",
  title = 	 "Patience diff advantages",
  howpublished = "\url{https://bramcohen.livejournal.com/73318.html}",
  month = 	 mar,
  year = 	 2010,
}


@Misc{PlumeLibMerging,
  author = 	 "Michael D. Ernst",
  title = 	 "Plume-lib merging: merge drivers and merge tools",
  howpublished = "\url{https://github.com/plume-lib/merging}",
  month = 	 sep,
  year = 	 2024,
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Diff -- tree-based
%%%


%%% Tree diff: Surveys

@InProceedings{Peters2005,
  author = 	 "Luuk Peters",
  title = 	 "Change detection in {XML} trees: a survey",
  booktitle = "3rd Twente Student Conference on IT",
  year = 	 2005,
  month = 	 jun,
  address = 	 "Enschede, The Netherlands",
}


%%% Tree diff: Papers other than surveys


@InProceedings{FalleriMBMM2014,
  author = 	 "Falleri, Jean-R\'{e}my and Morandat, Flor\'{e}al and Blanc, Xavier and Martinez, Matias and Monperrus, Martin",
  authorASCII =  "Falleri, Jean-Remy and Morandat, Floreal and Blanc, Xavier and Martinez, Matias and Monperrus, Martin",
  title = 	 "Fine-grained and accurate source code differencing",
  crossref =  "ASE2014",
  pages = 	 "313-324",
}


@inproceedings{10.1145/3238147.3238219,
author = {Huang, Kaifeng and Chen, Bihuan and Peng, Xin and Zhou, Daihong and Wang, Ying and Liu, Yang and Zhao, Wenyun},
title = {ClDiff: Generating Concise Linked Code Differences},
year = {2018},
isbn = {9781450359375},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3238147.3238219},
doi = {10.1145/3238147.3238219},
abstract = {Analyzing and understanding source code changes is important in a variety of software maintenance tasks. To this end, many code differencing and code change summarization methods have been proposed. For some tasks (e.g. code review and software merging), however, those differencing methods generate too fine-grained a representation of code changes, and those summarization methods generate too coarse-grained a representation of code changes. Moreover, they do not consider the relationships among code changes. Therefore, the generated differences or summaries make it not easy to analyze and understand code changes in some software maintenance tasks. In this paper, we propose a code differencing approach, named CLDIFF, to generate concise linked code differences whose granularity is in between the existing code differencing and code change summarization methods. The goal of CLDIFF is to generate more easily understandable code differences. CLDIFF takes source code files before and after changes as inputs, and consists of three steps. First, it pre-processes the source code files by pruning unchanged declara- tions from the parsed abstract syntax trees. Second, it generates concise code differences by grouping fine-grained code differences at or above the statement level and describing high-level changes in each group. Third, it links the related concise code differences according to five pre-defined links. Experiments with 12 Java projects (74,387 commits) and a human study with 10 participants have indicated the accuracy, conciseness, performance and usefulness of CLDIFF.},
booktitle = {Proceedings of the 33rd ACM/IEEE International Conference on Automated Software Engineering},
pages = {679-690},
numpages = {12},
keywords = {Code Differencing, Program Comprehension, AST},
location = {Montpellier, France},
series = {ASE '18}
}


@inproceedings{10.1007/11802167_104,
author = {Lee, Suk Kyoon and Kim, Dong Ah},
title = {X-Tree Diff+: efficient change detection algorithm in XML documents},
year = {2006},
isbn = {3540366792},
publisher = {Springer-Verlag},
address = {Berlin, Heidelberg},
url = {https://doi.org/10.1007/11802167_104},
doi = {10.1007/11802167_104},
abstract = {As web documents proliferate fast, the need for real-time computation of change (edit script) between web documents increases. Though fast heuristic algorithms have been proposed recently, the qualities of edit scripts produced by them are not satisfactory. In this paper, we propose X-tree Diff+ which produces better quality of edit scripts by introducing a tuning step based on the notion of consistency of matching. We also add copy operation to provide users more convenience. Tuning and copy operation increase matching ratio drastically. X-tree Diff+ produces better quality of edit scripts and runs fast equivalent to the time complexity of fastest heuristic algorithms},
booktitle = {Proceedings of the 2006 International Conference on Embedded and Ubiquitous Computing},
pages = {1037-1046},
numpages = {10},
location = {Seoul, Korea},
series = {EUC'06}
}


@InProceedings{LindholmKT2006,
  author = 	 "Lindholm, Tancred and Kangasharju, Jaakko and Tarkoma, Sasu",
  title = 	 "Fast and simple {XML} tree differencing by sequence alignment",
  crossref =  "DocEng2006",
  pages = 	 "75-84",
  abstract =
   "With the advent of XML we have seen a renewed interest in methods for
    computing the difference between trees. Methods that include heuristic
    elements play an important role in practical applications due to the inherent
    complexity of the problem. We present a method for differencing XML as ordered
    trees based on mapping the problem to the domain of sequence alignment,
    applying simple and efficient heuristics in this domain, and transforming back
    to the tree domain. Our approach provides a method to quickly compute changes
    that are meaningful transformations on the XML tree level, and includes
    subtree move as a primitive operation. We evaluate the feasibility of our
    approach and benchmark it against a selection of existing differencing
    tools. The results show our approach to be feasible and to have the potential
    to perform on par with tools of a more complex design in terms of both output
    size and execution time.",
}


@InProceedings{RimonS2005,
  author = 	 "Mikhaiel, Rimon and Stroulia, Eleni",
  title = 	 "Accurate and efficient {HTML} differencing",
  booktitle = "STEP'05: 13th IEEE International Workshop on Software Technology and Engineering Practice",
  year = 	 2005,
  pages = 	 "163-172",
}


@InProceedings{CobenaAM2002,
  author = 	 "Cob{\'e}ena, Gr{\'e}gory and Abiteboul, Serge and Marian, Am{\'e}lie",
  authorASCII =  "Cobeena, Gregory and Abiteboul, Serge and Marian, Amelie",
  title = 	 "Detecting changes in {XML} documents",
  crossref =  "ICDE2002",
  pages = 	 "41-52",
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Diff -- graph-based
%%%

@InProceedings{ApiwattanapongOH2004,
  author = 	 "Taweesup Apiwattanapong and Alessandro Orso and Mary Jean Harrold",
  title = 	 "A differencing algorithm for object-oriented programs",
  crossref =     "ASE2004",
  pages = 	 "2--13",
  abstract =
   "During software evolution, information about changes between different
    versions of a program is useful for a number of software engineering
    tasks. For many of these tasks, a purely syntactic differencing may not
    provide enough information for the task to be performed effectively. This
    problem is especially relevant in the case of object-oriented software, for
    which a syntactic change can have subtle and unforeseen effects. In this
    paper, we present a technique for comparing object-oriented programs that
    identifies both differences and correspondences between two versions of a
    program. The technique is based on a representation that handles
    object-oriented features and, thus, can capture the behavior of
    object-oriented programs. We also present JDIFF, a tool that implements the
    technique for Java programs, and empirical results that show the efficiency
    and effectiveness of the technique on a real program.",
  supersededby = "ApiwattanapongOH2007",
}


@Article{ApiwattanapongOH2007,
  author = 	 "Taweesup Apiwattanapong and Alessandro Orso and Mary Jean Harrold",
  title = 	 "{JD}iff: A differencing technique and tool for object-oriented programs",
  journal = 	 ASEjournal,
  year = 	 2007,
  volume = 	 14,
  month =        mar,
  pages = 	 "3--36",
  abstract =
   "During software evolution, information about changes between different
    versions of a program is useful for a number of software engineering
    tasks. For example, configuration-management systems can use change
    information to assess possible conflicts among updates from different
    users. For another example, in regression testing, knowledge about which
    parts of a program are unchanged can help in identifying test cases that
    need not be rerun. For many of these tasks, a purely syntactic differencing
    may not provide enough information for the task to be performed
    effectively.  This problem is especially relevant in the case of
    object-oriented software, for which a syntactic change can have subtle and
    unforeseen effects. In this paper, we present a technique for comparing
    object-oriented programs that identifies both differences and
    correspondences between two versions of a program. The technique is based
    on a representation that handles object-oriented features and, thus, can
    capture the behavior of object-oriented programs. We also present JDiff, a
    tool that implements the technique for Java programs. Finally, we present
    the results of four empirical studies, performed on many versions of two
    medium-sized subjects, that show the efficiency and effectiveness of the
    technique when used on real programs.",
  usesDaikonAsTestSubject = 1,
  downloads = "http://dx.doi.org/10.1007/s10515-006-0002-0 DOI",
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Diff -- other representations
%%%

@InProceedings{JacksonL1994,
  author = 	 "Daniel Jackson and David A. Ladd",
  title = 	 "Semantic diff: a tool for summarizing the effects of modifications",
  crossref =  "ICSM94",
  pages = 	 "243-252",
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Merging
%%%

@Article{Mens2002,
  author = 	 "Tom Mens",
  title = 	 "A state-of-the-art survey on software merging",
  journal = 	 IEEETSE,
  year = 	 2002,
  volume = 	 28,
  number = 	 5,
  pages = 	 "449-462",
  doi = 	 "10.1109/TSE.2002.1000449",
}


@InProceedings{KudrjavetsNR2022,
  author = 	 "Kudrjavets, Gunnar and Nagappan, Nachiappan and Rastogi, Ayushi",
  title = 	 "Do Small Code Changes Merge Faster? A Multi-Language Empirical Investigation",
  crossref =  "MSR2022",
  pages = 	 "537-548",
  abstract =  {Code velocity, or the speed with which code changes are integrated into a production environment, plays a crucial role in Continuous Integration and Continuous Deployment. Many studies report factors influencing code velocity. However, solutions to increase code velocity are unclear. Meanwhile, the industry continues to issue guidelines on "ideal" code change size, believing it increases code velocity despite lacking evidence validating the practice. Surprisingly, this fundamental question has not been studied to date. This study investigates the practicality of improving code velocity by optimizing pull request size and composition (ratio of insertions, deletions, and modifications).
\par
We start with a hypothesis that a moderate correlation exists between pull request size and time-to-merge. We selected 100 most popular, actively developed projects from 10 programming languages on GitHub. We analyzed our dataset of 845,316 pull requests by size, composition, and context to explore its relationship to time-to-merge---a proxy to measure code velocity. Our study shows that pull request size and composition do not relate to time-to-merge. Regardless of the contextual factors that can influence pull request size or composition (e.g., programming language), the observation holds. Pull request data from two other platforms: Gerrit and Phabricator (401,790 code reviews) confirms the lack of relationship. This negative result as in "...eliminate useless hypotheses ..." [75] challenges a widespread belief by showing that small code changes do not merge faster to increase code velocity.},
}


@InProceedings{MenezesTPMPMC2020,
  author = 	 "Menezes, Jos\'{e} William and Trindade, Bruno and Pimentel, Jo\~{a}o Felipe and Moura, Tayane and Plastino, Alexandre and Murta, Leonardo and Costa, Catarina",
  title = 	 "What Causes Merge Conflicts?",
  crossref =  "SBES2020",
  pages = 	 "203-212",
  abstract =  {During the software development process, several developers commonly change artifacts in parallel. A merge process can combine parallel changes. In the case of changes that cannot be automatically combined, the developer responsible for the merge must reconcile decisions and resolve conflicts. Some studies are concerned with investigating ways to deal with merge conflicts and measuring the effort that this activity may require. However, the investigation of factors that may reduce the occurrence of conflicts needs more and deeper attention. This paper aims at identifying and analyzing attributes of past merges with and without conflicts to understand what may induce physical conflicts. We analyzed 182,273 merge scenarios from 80 projects written in eight different programming languages to find characteristics that increase the chances of a merge to have a conflict. We found that attributes such as the number of committers, the number of commits, and the number of changed files seem to have the biggest influence in the occurrence of merge conflicts. Moreover, attributes in the branch that is being integrated seem to be more influential than the same attributes in the other branch. Additionally, we discovered positive correlations between the occurrence of conflicts and both the duration of the branch and the intersection of developers.},
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Merging -- line-based
%%%


@Article{PerrySV2001,
  author = 	 "Perry, Dewayne E. and Siy, Harvey P. and Votta, Lawrence G.",
  title = 	 "Parallel changes in large-scale software development: an observational case study",
  journal = 	 TOSEM,
  year = 	 2001,
  volume = 	 10,
  number = 	 3,
  pages = 	 "308-337",
  month = 	 "jul",
  abstract =  {An essential characteristic of large-scale software development is parallel development by teams of developers. How this parallel development is structured and supported has a profound effect on both the quality and timeliness of the product. We conduct an observational case study in which we collect and analyze the change and configuration management history of a legacy system to delineate the boundaries of, and to understand the nature of, the problems encountered in parallel development. The results of our studies are (1) that the degree of parallelism is very high -- higher than considered by tool builders; (2) there are multiple levels of parallelism, and the data for some important aspects are uniform and consistent for all levels; (3) the tails of the distributions are long, indicating the tail, rather than the mean, must receive serious attention in providing solutions for these problems; and (4) there is a significant correlation between the degree of parallel work on a given component and the number of quality problems it has. Thus, the results of this study are important both for tool builders and for process and project engineers.},
}


@Article{GhiottoMBvdH2020,
  author = 	 "Ghiotto, Gleiph and Murta, Leonardo and Barros, M{\'a}rcio and van der Hoek, Andr{\'e}",
  authorUTF =  "Ghiotto, Gleiph and Murta, Leonardo and Barros, Márcio and van der Hoek, André",
  authorASCII =  "Ghiotto, Gleiph and Murta, Leonardo and Barros, Marcio and van der Hoek, Andre",
  title = 	 "On the Nature of Merge Conflicts: A Study of 2,731 Open Source {Java} Projects Hosted by {GitHub}",
  journal = 	 ieeetse,
  year = 	 2020,
  volume = 	 46,
  number = 	 8,
  pages = 	 "892-915",
  month = 	 aug,
}


@Article{SeibtHCBA2022,
  author = 	 "Seibt, Georg and Heck, Florian and Cavalcanti, Guilherme and Borba, Paulo and Apel, Sven",
  title = 	 "Leveraging Structure in Software Merge: An Empirical Study",
  journal = 	 IEEETSE,
  year = 	 2022,
  volume = 	 48,
  number = 	 11,
  pages = 	 "4590-4610",
}


@InProceedings{NguyenI2017,
  author = 	 "Hoai Le Nguyen and Claudia-Lavinia Ignat",
  title = 	 "Parallelism and conflicting changes in {Git} version control systems",
  booktitle = "IWCES'17 - The Fifteenth International Workshop on Collaborative Editing Systems",
  year = 	 2017,
  month = 	 feb,
  address = 	 "Portland, OR, USA",
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Merging -- tree-based (including "structured" and "semi-structured")
%%%


@InProceedings{Asklund1994,
  author = 	 "Ulf Asklund",
  title = 	 "Identifying conflicts during structural merge",
  booktitle = "NWPER 1994, Nordic Workshop on Programming Environment Research",
  year = 	 1994,
  pages = 	 "231-242",
  month = 	 jun,
}


@InProceedings{HuntT2002,
  author = 	 "J. J. Hunt and W. F. Tichy",
  title = 	 "Extensible language-aware merging",
  crossref =  "ICSM2002",
  pages = 	 "511-520",
}


@InProceedings{ApelLBLK2011,
  author = 	 "Sven Apel and J{\"o}rg Liebig and Benjamin Brandl and Christian Lengauer and Christian K{\"a}stner",
  authorASCII = 	 "Sven Apel and Jorg Liebig and Benjamin Brandl and Christian Lengauer and Christian Kastner",
  title = 	 "Semistructured Merge: Rethinking Merge in Revision Control Systems",
  crossref =     "FSE2011",
  pages = 	 "190--200",
}


@InProceedings{TrindadeTavaresBCS2019,
  author = 	 "Trindade Tavares, Alberto and Borba, Paulo and Cavalcanti, Guilherme and Soares, S{\'e}rgio",
  authorUTF = 	 "Trindade Tavares, Alberto and Borba, Paulo and Cavalcanti, Guilherme and Soares, Sérgio",
  authorASCII = 	 "Trindade Tavares, Alberto and Borba, Paulo and Cavalcanti, Guilherme and Soares, Sergio",
  title = 	 "Semistructured Merge in {JavaScript} Systems",
  crossref =  "ASE2019",
  pages = 	 "1014-1025",
}


@InProceedings{ApelLL2012,
  author = 	 "Apel, Sven and Le{\ss}enich, Olaf and Lengauer, Christian",
  authorUTF = 	 "Apel, Sven and Leßenich, Olaf and Lengauer, Christian",
  authorASCII =  "Apel, Sven and Lessenich, Olaf and Lengauer, Christian",
  title = 	 "Structured merge with auto-tuning: balancing precision and performance",
  crossref =  "ICSE2012",
  pages = 	 "120-129",
}


@Article{LessenichAL2014,
  author = 	 "Olaf Le{\ss}enich and Sven Apel and Christian Lengauer",
  authorASCII =  "Olaf Lessenich and Sven Apel and Christian Lengauer",
  title = 	 "Balancing precision and performance in structured merge",
  journal = 	 JASE,
  year = 	 2014,
  volume = 	 22,
  number = 	 3,
  pages = 	 "367-397",
  month = 	 may,
}


@InProceedings{CavalcantiBA2017,
  author = 	 "Cavalcanti, Guilherme and Borba, Paulo and Accioly, Paola",
  title = 	 "Evaluating and improving semistructured merge",
  crossref =  "OOPSLA2017",
  pages = 	 "59:1-59:27",
}


@InProceedings{AsenovGMO2017,
  author = 	 "Asenov, Dimitar and Guenat, Balz and M{\"u}ller, Peter and Otth, Martin",
  title = 	 "Precise Version Control of Trees with Line-Based Version Control Systems",
  crossref =  "FASE2017",
  pages = 	 "152-169",
  abstract=
   "Version control of tree structures, ubiquitous in software engineering, is
    typically performed on a textual encoding of the trees, rather than the trees
    directly. Applying standard line-based diff and merge algorithms to such
    encodings leads to inaccurate diffs, unnecessary conflicts, and incorrect
    merges. To address these problems, we propose novel algorithms for computing
    precise diffs between two versions of a tree and for three-way merging of
    trees. Unlike most other approaches for version control of structured data,
    our approach integrates with mainstream version control systems. Our merge
    algorithm can be customized for specific application domains to further
    improve merge results. An evaluation of our approach on abstract syntax trees
    from popular Java projects shows substantially improved merge results
    compared to Git.",
}


@inproceedings{10.1145/1860559.1860600,
author = {Vion-Dury, Jean-Yves},
title = {Diffing, Patching and Merging XML Documents: Toward a Generic Calculus of Editing Deltas},
year = {2010},
isbn = {9781450302319},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1860559.1860600},
doi = {10.1145/1860559.1860600},
abstract = {This work addresses what we believe to be a central issue in the field of XML diff and merge computation: the mathematical modeling of the so-called "editing deltas" and the study of their formal abstract properties. We expect at least three outputs from this theoretical work: a common basis to compare performances of the various algorithms through a structural normalization of deltas, a universal and flexible patch application model and a clearer separation of patch and merge engine performance from delta generation performance. Moreover, this work could inspire technical approaches to combine heterogeneous engines thank to sound delta transformations. This short paper reports current results, discusses key points and outlines some perspectives.},
booktitle = {Proceedings of the 10th ACM Symposium on Document Engineering},
pages = {191-194},
numpages = {4},
keywords = {tree transformation, tree-to-tree correction, version control, XML, tree edit distance},
location = {Manchester, United Kingdom},
series = {DocEng '10}
}


@inproceedings{10.1145/2494266.2494277,
author = {Ba, M. Lamine and Abdessalem, Talel and Senellart, Pierre},
title = {Uncertain Version Control in Open Collaborative Editing of Tree-Structured Documents},
year = {2013},
isbn = {9781450317894},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/2494266.2494277},
doi = {10.1145/2494266.2494277},
abstract = {In order to ease content enrichment, exchange, and sharing, web-scale collaborative platforms such as Wikipedia or Google Docs enable unbounded interactions between a large number of contributors, without prior knowledge of their level of expertise and reliability. Version control is then essential for keeping track of the evolution of the shared content and its provenance. In such environments, uncertainty is ubiquitous due to the unreliability of the sources, the incompleteness and imprecision of the contributions, the possibility of malicious editing and vandalism acts, etc. To handle this uncertainty, we use a probabilistic XML model as a basic component of our version control framework. Each version of a shared document is represented by an XML tree and the whole document, together with its different versions, is modeled as a probabilistic XML document. Uncertainty is evaluated using the probabilistic model and the reliability measure associated to each source, each contributor, or each editing event, resulting in an uncertainty measure on each version and each part of the document. We show that standard version control operations can be implemented directly as operations on the probabilistic XML model; efficiency with respect to deterministic version control systems is demonstrated on real-world datasets.},
booktitle = {Proceedings of the 2013 ACM Symposium on Document Engineering},
pages = {27-36},
numpages = {10},
keywords = {xml, collaborative work, uncertain data, version control},
location = {Florence, Italy},
series = {DocEng '13}
}


@Article{LarsenFBM2023,
  author = 	 "Lars{\'e}n, Simon and Falleri, Jean-R{\'e}my and Baudry, Benoit and Monperrus, Martin",
  title = 	 "Spork: Structured Merge for {Java} with Formatting Preservation",
  journal = 	 IEEETSE,
  year = 	 2023,
  volume = 	 49,
  number = 	 01,
  pages = 	 "64-83",
  month = 	 jan,
}


@InProceedings{ZHY2019,
  author = 	 "Fengmin Zhu and Fei He and Qianshan Yu",
  title = 	 "Enhancing precision of structured merge by proper tree matching",
  crossref = "ICSECompanion2019",
  pages = 	 "286-287",
}


@Misc{Hume2017,
  author = 	 "Tristan Hume",
  title = 	 "Designing a tree diff algorithm using dynamic programming and {A*}",
  howpublished = "\url{https://thume.ca/2017/06/17/tree-diffing/}",
  year = 	 2017,
}


@InProceedings{Lindholm2004,
  author = 	 "Tancred Lindholm",
  title = 	 "A three-way merge for {XML} documents",
  booktitle = "DocEng",
  year = 	 2004,
  NEEDpages = 	 "*",
}


@InProceedings{BakaoukasB2020,
  author = 	 "Bakaoukas, Anastasios G. and Bakaoukas, Nikolaos G.",
  title = 	 "A Top-Down Three-Way Merge Algorithm for {HTML/XML} Documents",
  booktitle = "IntelliSys 2020: Intelligent Systems and Applications: Proceedings of the 2020 Intelligent Systems Conference",
  year = 	 2020,
  pages = 	 "75-96",
  abstract =
   "Collaborative work, with the need to keep HTML/XML code up-to-date, is now
    becoming vital particularly in the Web Development field. In order to fully
    support collaborative work and resolve related problems the need has arisen
    for an optimum solution to the automated editing of a number of parallel
    copies originating from a single original HTML/XML code document with the
    additional requirement to subsequently merge the copies into a single updated
    document. A number of algorithms have been used in the past for the purpose,
    such as: Diff3, XmlDiff, DeltaXML {\&} 3DM, but HTML/XML code complexity
    related issues have now called for an algorithm that is more specifically
    designed for the purpose. In this paper a new algorithmic approach to merging
    HTML/XML code documents is presented that is based on the ``Three-way Merge''
    approach and the ``Node-per-Node'' comparison between ordered trees. For the
    creation of the actual merging function operating at the heart of the
    algorithm, a particular methodology was followed in which only the two
    ``Current Versions'' are required for the generation of the updated document,
    and no involvement of the ``Original Document'' is necessary. This is an
    important improvement over the currently existing algorithms because
    eliminates the well-known ``Idempotent'' problem in merging two HTML/XML
    documents. In addition, the algorithmic approach presented here allows for the
    identification and treatment of all the conflicts that arise during a HTML/XML
    code merging in an ordered and clearly specified manner.",
}


@InProceedings{SkafMolliMRN2008,
  author = 	 "Skaf-Molli, Hala and Molli, Pascal and Rahhal, Charbel and Naja-Jazzar, Hala",
  title = 	 "Collaborative Writing of {XML} Documents",
  pages = 	 "1-6",
  crossref =  "ICTTA2008",
  year = 	 2008,
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Merging -- graph-based
%%%

@InProceedings{HorwitzPR88a,
  author = 	 "Susan Horwitz and Jan Prins and Thomas Reps",
  title = 	 "Integrating non-interfering versions of programs",
  crossref = 	 "POPL88",
  pages =	 "146--157",
}


@Article{HorwitzPR89:TOPLAS,
  author = 	 "Susan Horwitz and Jan Prins and Thomas Reps",
  title = 	 "Integrating non-interfering versions of programs",
  journal =	 toplas,
  year =	 1989,
  volume =	 11,
  number =	 3,
  pages =	 "345--387",
  month =	 jul
}


@TechReport{Binkley91,
  author = 	 "David W. Binkley",
  title = 	 "Multi-Procedure Program Integration",
  institution =  UWMadison,
  year = 	 1991,
  number =	 1038,
  month =	 Aug
}


@inproceedings{10.1145/1826147.1826154,
author = {Koegel, Maximilian and Herrmannsdoerfer, Markus and von Wesendonk, Otto and Helming, Jonas},
title = {Operation-Based Conflict Detection},
year = {2010},
isbn = {9781605589602},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1826147.1826154},
doi = {10.1145/1826147.1826154},
abstract = {In recent years, models are increasingly used throughout the entire lifecycle in software engineering projects. In effect, the need for collaboration and for management of change on these models emerged. Traditionally, Software Configuration Management (SCM) systems are employed to facilitate collaboration on software engineering artifacts and to control change to these artifacts. For scalability and to support offline operation, most of these systems employ optimistic concurrency control and therefore require methods to detect concurrent change---also known as conflict detection. However, many researchers have shown that existing approaches for SCM systems do not work well on graph-like models, since they are geared towards textual artifacts and do not take the graph structure of models into account. The approaches for conflict detection in these systems show many false positives, since they require a merge every time the same configuration item --- in this case the same file --- is changed. In this paper, we propose operation-based conflict detection, which detects conflicts directly on the operations that change the model. We compare operation-based conflict detection to file-based conflict detection in a multi-case study and show that operation-based conflict detection results in less conflicts and therefore requires fewer merges.},
booktitle = {Proceedings of the 1st International Workshop on Model Comparison in Practice},
pages = {21-30},
numpages = {10},
keywords = {conflict detection, model merging, software configuration management, operation-based, version control},
location = {Malaga, Spain},
series = {IWMCP '10}
}
@inproceedings{10.1109/CVSM.2009.5071721,
author = {Koegel, Maximilian and Helming, Jonas and Seyboth, Stephan},
title = {Operation-Based Conflict Detection and Resolution},
year = {2009},
isbn = {9781424437146},
publisher = {IEEE Computer Society},
address = {USA},
url = {https://doi.org/10.1109/CVSM.2009.5071721},
doi = {10.1109/CVSM.2009.5071721},
abstract = {Models are in wide-spread use in the software development lifecycle and model-driven development even promotes them from an abstraction of the system to the description the system is generated from. Therefore it is increasingly important to collaborate on models. These models can range from requirements models over UML models to project management models such as schedules. Tool support for collaboration on models is therefore crucial. Traditionally Software Configuration Management (SCM) systems such as RCS [9] or Subversion [10] have supported this task for textual artifacts such as source code on the granularity of files and textual lines. They do not work well for graph-like models with many links since the granularity needed to support them is on the level of model elements and their attributes. For the design of a novel SCM system addressing these requirements it is essential to define how conflicts on models are detected and how they can be resolved. In this paper we present an approach to conflict detection and resolution on models. We employ operation-based change tracking and therefore detect conflicts based on operations. For conflict resolution we propose an integration of SCM with techniques from Rational Management to effectively resolve conflicts.},
booktitle = {Proceedings of the 2009 ICSE Workshop on Comparison and Versioning of Software Models},
pages = {43-48},
numpages = {6},
series = {CVSM '09}
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Merging -- neural, deep learning
%%%

@Article{DinellaMSBNL2023,
  author = 	 "Dinella, Elizabeth and Mytkowicz, Todd and Svyatkovskiy, Alexey and Bird, Christian and Naik, Mayur and Lahiri, Shuvendu",
  title = 	 "{DeepMerge}: Learning to merge programs",
  journal = 	 TSE,
  year = 	 2023,
  volume = 	 49,
  number = 	 4,
  pages = 	 "1599–1614",
  month = 	 apr,
  abstract =
   "In collaborative software development, program merging is \emph{the}
    mechanism to integrate changes from multiple programmers. Merge algorithms
    in modern version control systems report a conflict when changes interfere
    textually. Merge conflicts require manual intervention and frequently stall
    modern continuous integration pipelines. Prior work found that, although
    costly, a large majority of resolutions involve re-arranging text without
    writing any new code. Inspired by this observation we propose the
    \emph{first data-driven approach} to resolve merge conflicts with a machine
    learning model. We realize our approach in a tool DeepMerge that uses a
    novel combination of (i) an edit-aware embedding of merge inputs and (ii) a
    variation of pointer networks, to construct resolutions from input
    segments. We also propose an algorithm to localize manual resolutions in a
    resolved file and employ it to curate a ground-truth dataset comprising
    8,719 non-trivial resolutions in JavaScript programs. Our evaluation shows
    that, on a held out test set, DeepMerge can predict correct resolutions for
    37\% of non-trivial merges, compared to only 4\% by a state-of-the-art
    semistructured merge technique. Furthermore, on the subset of merges with
    upto 3 lines (comprising 24\% of the total dataset), DeepMerge can predict
    correct resolutions with 78\% accuracy.",
}


@InProceedings{SvyatkovskiyFGMDBJSL2022,
  author = 	 "Svyatkovskiy, Alexey and Fakhoury, Sarah and Ghorbani, Negar and Mytkowicz, Todd and Dinella, Elizabeth and Bird, Christian and Jang, Jinu and Sundaresan, Neel and Lahiri, Shuvendu K.",
  title = 	 "Program Merge Conflict Resolution via Neural Transformers",
  crossref = "FSE2022",
  pages = 	 "822-833",
  abstract =
   "Collaborative software development is an integral part of the modern
    software development life cycle, essential to the success of large-scale
    software projects. When multiple developers make concurrent changes around
    the same lines of code, a merge conflict may occur. Such conflicts stall
    pull requests and continuous integration pipelines for hours to several
    days, seriously hurting developer productivity. To address this problem, we
    introduce MergeBERT, a novel neural program merge framework based on
    token-level three-way differencing and a transformer encoder model. By
    exploiting the restricted nature of merge conflict resolutions, we
    reformulate the task of generating the resolution sequence as a
    classification task over a set of primitive merge patterns extracted from
    real-world merge commit data. Our model achieves 63-68\% accuracy for merge
    resolution synthesis, yielding nearly a 3\texttimes{} performance
    improvement over existing semi-structured, and 2\texttimes{} improvement
    over neural program merge tools. Finally, we demonstrate that MergeBERT is
    sufficiently flexible to work with source code files in Java, JavaScript,
    TypeScript, and C# programming languages. To measure the practical use of
    MergeBERT, we conduct a user study to evaluate MergeBERT suggestions with
    25 developers from large OSS projects on 122 real-world conflicts they
    encountered. Results suggest that in practice, MergeBERT resolutions would
    be accepted at a higher rate than estimated by automatic metrics for
    precision and accuracy. Additionally, we use participant feedback to
    identify future avenues for improvement of MergeBERT.",
}


@article{10.1145/3688155,
author = {Lahiri, Shuvendu and Svyatkovskiy, Alexey and Bird, Christian and Meijer, Erik and Coatta, Terry},
title = {Program Merge: What's Deep Learning Got to Do with It? A discussion with Shuvendu Lahiri, Alexey Svyatkovskiy, Christian Bird, Erik Meijer and Terry Coatta},
year = {2024},
issue_date = {July/August 2024},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {22},
number = {4},
issn = {1542-7730},
url = {https://doi.org/10.1145/3688155},
doi = {10.1145/3688155},
abstract = {If you regularly work with open-source code or produce software for a large organization, you're already familiar with many of the challenges posed by collaborative programming at scale. Some of the most vexing of these tend to surface as a consequence of the many independent alterations inevitably made to code, which, unsurprisingly, can lead to updates that don't synchronize. Difficult merges are nothing new, of course, but the scale of the problem has gotten much worse. This is what led a group of researchers at MSR (Microsoft Research) to take on the task of complicated merges as a grand program-repair challenge, one they believed might be addressed at least in part by machine learning.},
journal = {Queue},
month = sep,
pages = {101–119},
numpages = {19}
}                  


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Merging -- other representations
%%%


@article{10.1145/3276535,
author = {Sousa, Marcelo and Dillig, Isil and Lahiri, Shuvendu K.},
title = {Verified Three-Way Program Merge},
year = {2018},
issue_date = {November 2018},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {2},
number = {OOPSLA},
url = {https://doi.org/10.1145/3276535},
doi = {10.1145/3276535},
abstract = {Even though many programmers rely on 3-way merge tools to integrate changes from different branches, such tools can introduce subtle bugs in the integration process. This paper aims to mitigate this problem by defining a semantic notion of conflict-freedom, which ensures that the merged program does not introduce new unwanted behaviors. We also show how to verify this property using a novel, compositional algorithm that combines lightweight summarization for shared program fragments with precise relational reasoning for the modifications. Towards this goal, our method uses a 4-way differencing algorithm on abstract syntax trees to represent different program versions as edits applied to a shared program with holes. This representation allows our verification algorithm to reason about different edits in isolation and compose them to obtain an overall proof of conflict freedom. We have implemented the proposed technique in a new tool called SafeMerge for Java and evaluate it on 52 real-world merge scenarios obtained from GitHub. The experimental results demonstrate the benefits of our approach over syntactic conflict-freedom and indicate that SafeMerge is both precise and practical.},
journal = {Proc. ACM Program. Lang.},
month = {oct},
articleno = {165},
numpages = {29},
keywords = {Three-way program merge, product programs, relational verification}
}


@Article{Berzins1994,
  author = 	 "Berzins, Valdis",
  title = 	 "Software Merge: Semantics of Combining Changes to Programs",
  journal = 	 TOPLAS,
  year = 	 1994,
  volume = 	 16,
  number = 	 6,
  pages = 	 "1875-1903",
  note = 	 nov,
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% New version control systems
%%%

@inproceedings{10.1145/2905055.2905072,
author = {Rao, N. Rama and Sekharaiah, K. Chandra},
title = {A Methodological Review Based Version Control System with Evolutionary Research for Software Processes},
year = {2016},
isbn = {9781450339629},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/2905055.2905072},
doi = {10.1145/2905055.2905072},
abstract = {The constant add up of new features and bug fixes often make invariable changes and refinements inevitable in the software development at regular intervals. The software should enable the end-user to decipher the critical version information about the minutest detail in every software artifact and if it does not, no software can be called as complete or nor is it user-friendly. When developers check in the software files they have to put in the version field manually as a part of header information inside the file. It would be difficult to identify vulnerabilities in the software when the files lack embedded version information which makes it harder for the end users to accurately identify the running version of the software. How can version control system (VCS) help in optimizing the bug traceability time and bug fixing potential? Version control system (VCS) is a software application that helps in collaborative software development of software projects. In order to support this instance, we should analyze which version control system is to be considered and whether to use central repository version control system or local repository version control system. Various functional, non functional evaluation criteria and fundamental requirements like installation, configuration, learning curve, performance, commits, branching, merging, and tagging have been considered for a through comparison of many version control systems. In this paper, we review and analyze the currently used VCS that involved in a view of a collaborative software development for software evolution. And an attempt has also been made to identify the version information in every deliverable file or retain it with the source. We demonstrate and perform survey on current versioning systems like Concurrent Version Control System (CVS), Subversion (SVN), Team Foundation Server (TFS), Git, ACME, Visual Source Safe(VSS), Mercurial and Clear Case, etc. that offers opportunities to test produced results and validate.},
booktitle = {Proceedings of the Second International Conference on Information and Communication Technology for Competitive Strategies},
articleno = {14},
numpages = {6},
keywords = {Source code management(SCM), CVS, Git, SVN, Mercurial, Distributed version control system(DVCS), TFS, Version control system(VCS)},
location = {Udaipur, India},
series = {ICTCS '16}
}


@InProceedings{CavalcantiBSA2019,
  author = 	 "Guilherme Cavalcanti and Paulo Borba and Georg Seibt and Sven Apel",
  title = 	 "The impact of structure on software merging: Semistructured versus structured merge",
  crossref =  "ASE2019",
  pages = 	 "1002-1013",
  abstract =
   "Merge conflicts often occur when developers concurrently change
    the same code artifacts. While state of practice unstructured merge tools
    (e.g Git merge) try to automatically resolve merge conflicts based on textual
    similarity, semistructured and structured merge tools try to go further by
    exploiting the syntactic structure and semantics of the artifacts
    involved. Although there is evidence that semistructured merge has
    significant advantages over unstructured merge, and that structured merge
    reports significantly fewer conflicts than unstructured merge, it is unknown
    how semistructured merge compares with structured merge. To help developers
    decide which kind of tool to use, we compare semistructured and structured
    merge in an empirical study by reproducing more than 40,000 merge scenarios
    from more than 500 projects. In particular, we assess how often the two merge
    strategies report different results, we identify conflicts incorrectly
    reported by one but not by the other (false positives), and conflicts
    correctly reported by one but missed by the other (false negatives). Our
    results show that semistructured and structured merge differ in 24% of the
    scenarios with conflicts. Semistructured merge reports more false positives,
    whereas structured merge has more false negatives. Finally, we found that
    adapting a semistructured merge tool to resolve a particular kind of conflict
    makes semistructured and structured merge even closer.",
}


@inproceedings{10.1145/3135932.3135943,
author = {Cavalcanti, Guilherme},
title = {What Merge Tool Should I Use?},
year = {2017},
isbn = {9781450355148},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3135932.3135943},
doi = {10.1145/3135932.3135943},
abstract = {While unstructured merge tools try to automatically resolve merge conflicts via textual similarity, semistructured and structured merge tools try to go further by exploiting the syntactic structure and semantics of the involved artefacts. Previous studies compare these merge approaches with respect to the number of reported conflicts, showing, for most projects and merge situations, a reduction in favor of semistructured and structured merge. However, these studies do not investigate whether this reduction actually leads to integration effort reduction (Productivity) without negative impact on the correctness of the merging process (Quality). To analyze this, and to better understand how these tools could be improved, we propose empirical studies to identify spurious conflicts (false positives) reported by one approach but not by the other, and interference reported as conflict by one approach but missed by the other (false negatives).},
booktitle = {Proceedings Companion of the 2017 ACM SIGPLAN International Conference on Systems, Programming, Languages, and Applications: Software for Humanity},
pages = {19-20},
numpages = {2},
keywords = {version control systems, empirical studies, software merging, collaborative development},
location = {Vancouver, BC, Canada},
series = {SPLASH Companion 2017}
}


@InProceedings{TavaresBCS2020,
  author = 	 "Tavares, Alberto Trindade and Borba, Paulo and Cavalcanti, Guilherme and Soares, S\'{e}rgio",
  title = 	 "Semistructured Merge in {JavaScript} Systems",
  crossref =  "ASE2020",
  pages = 	 "1014-1025",
  abstract =  {Industry widely uses unstructured merge tools that rely on textual analysis to detect and resolve conflicts between code contributions. Semistructured merge tools go further by partially exploring the syntactic structure of code artifacts, and, as a consequence, obtaining significant merge accuracy gains for Java-like languages. To understand whether semistructured merge and the observed gains generalize to other kinds of languages, we implement two semistructured merge tools for JavaScript, and compare them to an unstructured tool. We find that current semistructured merge algorithms and frameworks are not directly applicable for scripting languages like JavaScript. By adapting the algorithms, and studying 10,345 merge scenarios from 50 JavaScript projects on GitHub, we find evidence that our JavaScript tools report fewer spurious conflicts than unstructured merge, without compromising the correctness of the merging process. The gains, however, are much smaller than the ones observed for Java-like languages, suggesting that semistructured merge advantages might be limited for languages that allow both commutative and non-commutative declarations at the same syntactic level.},
}


@inproceedings{10.1145/3555228.3555229,
author = {Campos Junior, Heleno de S. and de Menezes, Gleiph Ghiotto L. and Barros, M\'{a}rcio de Oliveira and van der Hoek, Andr\'{e} and Murta, Leonardo Gresta Paulino},
title = {Towards Merge Conflict Resolution by Combining Existing Lines of Code},
year = {2022},
isbn = {9781450397353},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3555228.3555229},
doi = {10.1145/3555228.3555229},
abstract = {Software developers often need to combine their contributions. This operation is called merge. When the contributions happen at the same physical region in the source code, the merge is marked as conflicting and must be manually resolved by the developers. Existing studies explore why conflicts happen, their characteristics, and how they are resolved. In this paper, we investigate a specific subset of merge conflicts, which may be resolved using a combination of existing lines. We analyze 10,177 conflict chunks of popular projects that were resolved by combining existing lines, aiming at characterizing and finding patterns that developers frequently use to resolve them. We found that these conflicting chunks and their resolutions are usually small (they have a median of 6 LOC and 3 LOC, respectively). Moreover, 98.6\% of the analyzed resolutions preserve the order of the lines in the conflicting chunks. We also found that 77.4\% of the chunk resolutions do not interleave lines from different contributions more than once. These findings altogether, when used as heuristics for automatic merge resolution, could enable the reduction of the search space by 94.7\%, paving the road for future search-based software engineering tools for this problem.},
booktitle = {Proceedings of the XXXVI Brazilian Symposium on Software Engineering},
pages = {425-434},
numpages = {10},
keywords = {search-based software engineering, conflict resolution, Version control systems, software merge},
location = {Virtual Event, Brazil},
series = {SBES '22}
}


@inproceedings{10.1109/ICSE-C.2017.103,
author = {Cavalcanti, Guilherme and Borba, Paulo and Accioly, Paola},
title = {Should We Replace Our Merge Tools?},
year = {2017},
isbn = {9781538615898},
publisher = {IEEE Press},
url = {https://doi.org/10.1109/ICSE-C.2017.103},
doi = {10.1109/ICSE-C.2017.103},
abstract = {While unstructured merge tools try to automatically resolve merge conflicts via textual similarity, semistructured merge tools try to go further by partially exploiting the syntactic structure and semantics of the involved artefacts. Previous studies compare these merge approaches with respect to the number of reported conflicts, showing, for most projects and merge situations, a reduction in favor of semistructured merge. However, these studies do not investigate whether this reduction actually leads to integration effort reduction (Productivity) without negative impact on the correctness of the merging process (Quality). To analyze this, and to better understand how these tools could be improved, we propose empirical studies to identify spurious conflicts reported by one approach but not by the other, and interference reported as conflict by one approach but missed by the other.},
booktitle = {Proceedings of the 39th International Conference on Software Engineering Companion},
pages = {325-327},
numpages = {3},
keywords = {empirical studies, version control systems, collaborative development, software merging},
location = {Buenos Aires, Argentina},
series = {ICSE-C '17}
}


@Article{ZhuH2018,
  author = 	 "Zhu, Fengmin and He, Fei",
  title = 	 "Conflict Resolution for Structured Merge via Version Space Algebra",
  journal = 	 PACMPL,
  year = 	 2018,
  volume = 	 2,
  number = 	 "OOPSLA",
  month = 	 oct,
  articleno = 166,
  numpages = 25,
  abstract =
   "Resolving conflicts is the main challenge for software merging. The
    existing merge tools usually rely on the developer to manually resolve
    conflicts. This is of course inefficient. We propose an interactive
    approach for resolving merge conflicts. To the best of our knowledge, this
    is the first attempt for conflict resolution of structured merge. To
    represent the possibly very large set of candidate programs, we propose an
    expressive and efficient representation by version space algebra. We also
    design a simple mechanism for ranking resolutions in the program space,
    such that the top-ranked resolution is very likely to meet the developer’s
    expectation.  We prototype our approach as a merge tool AutoMerge, and
    evaluate it on 244 real-world conflicts arising from 10 open-source
    projects. Results show great practicality of our approach.",
}


@Article{KhleelN2020,
  author = 	 "Nasraldeen Alnor Adam Khleel and Károly Nehéz",
  title = 	 "Merging problems in modern version control systems",
  journal = 	 "Multidiszciplináris Tudományok",
  year = 	 2020,
  volume = 	 10,
  number = 	 3,
  pages = 	 "365-376",
  doi = 	 "10.35925/ji.multi.2020.3.44",
}

@InProceedings{Cavalcanti2017,
  author = 	 "Cavalcanti, Guilherme",
  title = 	 "What merge tool should I use?",
  crossref =  "SPLASH2017Companion",
  pages = 	 "19-20",
}


@InProceedings{RaoS2016,
  author = 	 "Rao, N. Rama and Sekharaiah, K. Chandra",
  title = 	 "A Methodological Review Based Version Control System with Evolutionary Research for Software Processes",
  crossref =  "ICTCS2016",
articleno = {14},
numpages = {6},
}


@InProceedings{SwierstraL2014,
  author = 	 "Swierstra, Wouter and L{\"o}h, Andres",
  authorASCII = 	 "Swierstra, Wouter and Loh, Andres",
  title = 	 "The Semantics of Version Control",
  crossref =  "Onward2014",
  pages = 	 "43-54",
}


@InProceedings{MehdiUC2014,
  author = 	 "Mehdi, Ahmed-Nacer and Urso, Pascal and Charoy, Fran\c{c}ois",
  authorASCII =	 "Mehdi, Ahmed-Nacer and Urso, Pascal and Charoy, Francois",
  title = 	 "Evaluating Software Merge Quality",
  crossref =  "EASE2014",
articleno = {9},
numpages = {10},
}


@Misc{git-hires-merge,
  author = 	 "Paul Altin",
  title = 	 "git-hires-merge",
  howpublished = "\url{https://github.com/paulaltin/git-hires-merge}",
  note =         "Accessed 2023-07-31",
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Tangled commits and untangling/clustering algorithms
%%%


@InProceedings{KirinukiHHK2016,
  author = 	 "Kirinuki, Hiroyuki and Higo, Yoshiki and Hotta, Keisuke and Kusumoto, Shinji",
  title = 	 "Splitting commits via past code changes",
  crossref =  "APSEC2016",
  pages = 	 "129-136",
}


@InProceedings{DashAB2018,
  author = 	 "Dash, Santanu Kumar and Allamanis, Miltiadis and Barr, Earl T.",
  title = 	 "{RefiNym}: Using Names to Refine Types",
  crossref =  "fse2018",
  pages = 	 "107-117",
}


@InProceedings{PartachiDAB2020,
  author = 	 "P\^{a}rtachi, Profir-Petru and Dash, Santanu Kumar and Allamanis, Miltiadis and Barr, Earl T.",
  authorASCII =  "Partachi, Profir-Petru and Dash, Santanu Kumar and Allamanis, Miltiadis and Barr, Earl T.",
  title = 	 "Flexeme: Untangling Commits Using Lexical Flows",
  crossref =  "FSE2020",
  pages = 	 "63-74",
}


@InProceedings{ShenZKZWLJ2021,
  author = 	 "Shen, Bo and Zhang, Wei and K{\"a}stner, Christian and Zhao, Haiyan and Wei, Zhao and Liang, Guangtai and Jin, Zhi",
  authorASCII =  "Shen, Bo and Zhang, Wei and Kastner, Christian and Zhao, Haiyan and Wei, Zhao and Liang, Guangtai and Jin, Zhi",
  title = 	 "{SmartCommit}: A graph-based interactive assistant for activity-oriented commits",
  crossref =  "FSE2021",
  pages = 	 "379-390",
}


@Article{JiangLLZCNZHBZ2022,
  author = 	 "Jiang, Yanjie and Liu, Hui and Luo, Xiaoqing and Zhu, Zhihao and Chi, Xiaye and Niu, Nan and Zhang, Yuxia and Hu, Yamin and Bian, Pan and Zhang, Lu",
  title = 	 "{BugBuilder}: An Automated Approach to Building Bug Repository",
  journal = 	 ieeetse,
  year = 	 2022,
  NEEDvolume = 	 "*",
  NEEDnumber = 	 "*",
  NEEDpages = 	 "*",
  NEEDmonth = 	 "*",
}


@InProceedings{ChenXYX2022,
  author = 	 "Chen, Siyu and Xu, Shengbin and Yao, Yuan and Xu, Feng",
  title = 	 "Untangling composite commits by attributed graph clustering",
  crossref =  "Internetware2022",
  pages = 	 "117-126",
}


@InProceedings{WangLZX2019,
  author = 	 "Wang, Min and Lin, Zeqi and Zou, Yanzhen and Xie, Bing",
  title = 	 "{CoRA}: Decomposing and describing tangled code changes for reviewer",
  crossref =  "ASE2019",
  pages = 	 "1050-1061",
  abstract =
   "Code review is an important mechanism for code quality assurance both in
    open source software and industrial software. Reviewers usually suffer from
    numerous, tangled and loosely related code changes that are bundled in a
    single commit, which makes code review very difficult. In this paper, we
    propose CoRA (<u>Co</u>de <u>R</u>eview <u>A</u>ssistant), an automatic
    approach to decompose a commit into different parts and generate concise
    descriptions for reviewers. More specifically, CoRA can decompose a commit
    into independent parts (e.g., bug fixing, new feature adding, or
    refactoring) by code dependency analysis and tree-based similar-code
    detection, then identify the most important code changes in each part based
    on the PageRank algorithm and heuristic rules. As a result, CoRA can
    generate a concise description for each part of the commit. We evaluate our
    approach in seven open source software projects and 50 code commits. The
    results indicate that CoRA can improve the accuracy of decomposing code
    changes by 6.3\% over the state-of-art practice. At the same time, CoRA can
    identify the important part from the fine-grained code changes with a mean
    average precision (MAP) of 87.7\%. We also conduct a human study with eight
    participants to evaluate the performance and usefulness of CoRA, the user
    feedback indicates that CoRA can effectively help reviewers.",
}


@InProceedings{BarnettBBL2015,
  author = 	 "Barnett, Mike and Bird, Christian and Brunet, Jo{\~a}o and Lahiri, Shuvendu K.",
  authorASCII =  "Barnett, Mike and Bird, Christian and Brunet, Joao and Lahiri, Shuvendu K.",
  authorUTF8 = 	 "Barnett, Mike and Bird, Christian and Brunet, João and Lahiri, Shuvendu K.",
  title = 	 "Helping developers help themselves: Automatic decomposition of code review changesets",
  crossref =  "ICSE2015",
  pages = 	 "134-144",
}


@InProceedings{LiWN2022,
  author = 	 "Li, Yi and Wang, Shaohua and Nguyen, Tien N.",
  title = 	 "{UTango}: Untangling commits with context-aware, graph-based, code change clustering learning model",
  crossref =  "FSE2022",
  pages = 	 "221-232",
  abstract =
   "During software evolution, developers make several changes and commit them
    into the repositories. Unfortunately, many of them tangle different
    purposes, both hampering program comprehension and reducing separation of
    concerns. Automated approaches with deterministic solutions have been
    proposed to untangle commits. However, specifying an effective clustering
    criteria on the changes in a commit for untangling is challenging for those
    approaches. In this work, we present UTango, a machine learning (ML)-based
    approach that learns to untangle the changes in a commit. We develop a
    novel code change clustering learning model that learns to cluster the code
    changes, represented by the embeddings, into different groups with
    different concerns. We adapt the agglomerative clustering algorithm into a
    supervised-learning clustering model operating on the learned code change
    embeddings via trainable parameters and a loss function in comparing the
    predicted clusters and the correct ones during training. To facilitate our
    clustering learning model, we develop a context-aware, graph-based, code
    change representation learning model, leveraging Label, Graph-based
    Convolution Network to produce the contextualized embeddings for code
    changes, that integrates program dependencies and the surrounding contexts
    of the changes. The contexts and cloned code are also explicitly
    represented, helping UTango distinguish the concerns. Our empirical
    evaluation on C# and Java datasets with 1,612 and 14k tangled commits show
    that it achieves the accuracy of 28.6\%--462.5\% and 13.3\%--100.0\%
    relatively higher than the state-of-the-art commit-untangling approaches
    for C\# and Java, respectively.",
}


@Article{HerzigJZ2016,
  author = 	 "Herzig, Kim and Just, Sascha and Zeller, Andreas",
  title = 	 "The impact of tangled code changes on defect prediction models",
  journal = 	 JEmpiricalSE,
  year = 	 2016,
  volume = 	 21,
  number = 	 2,
  pages = 	 "303--336",
  abstract =
   "When interacting with source control management system, developers often
    commit unrelated or loosely related code changes in a single
    transaction. When analyzing version histories, such tangled changes will
    make all changes to all modules appear related, possibly compromising the
    resulting analyses through noise and bias. In an investigation of five
    open-source JAVA projects, we found between 7\% and 20\% of all bug fixes
    to consist of multiple tangled changes. Using a multi-predictor approach to
    untangle changes, we show that on average at least 16.6\% of all source
    files are incorrectly associated with bug reports. These incorrect bug file
    associations seem to not significantly impact models classifying source
    files to have at least one bug or no bugs. But our experiments show that
    untangling tangled code changes can result in more accurate regression bug
    prediction models when compared to models trained and tested on tangled bug
    datasets—in our experiments, the statistically significant accuracy
    improvements lies between 5\% and 200\%. We recommend better change
    organization to limit the impact of tangled changes."
}


@article{rand1971objective,
  title={Objective criteria for the evaluation of clustering methods},
  author={Rand, William M.},
  journal={Journal of the American Statistical Association},
  volume={66},
  number={336},
  pages={846--850},
  year={1971},
  OMITpublisher={Taylor \& Francis}
}


@InProceedings{TaoK2015,
  author = 	 "Tao, Yida and Kim, Sunghun",
  title = 	 "Partitioning composite code changes to facilitate code review",
  crossref =  "MSR2015",
  pages = 	 "180--190",
}


@InProceedings{KreutzerDREP2016,
  author = 	 "Kreutzer, Patrick and Dotzler, Georg and Ring, Matthias and Eskofier, Bjoern M. and Philippsen, Michael",
  title = 	 "Automatic clustering of code changes",
  crossref =  "MSR2016",
  pages = 	 "61--72",
}


@InProceedings{DiasBGCD2015,
  author = 	 "Dias, Mart{\'\i}n and Bacchelli, Alberto and Gousios, Georgios and Cassou, Damien and Ducasse, St{\'e}phane",
  title = 	 "Untangling fine-grained code changes",
  crossref =  "SANER2015",
  pages = 	 "341--350",
}


@InProceedings{NguyenNN2013,
  author = 	 "Nguyen, Hoan Anh and Nguyen, Anh Tuan and Nguyen, Tien N.",
  title = 	 "Filtering noise in mixed-purpose fixing commits to improve defect prediction and localization",
  crossref =  "ISSRE2013",
  pages = 	 "138--147",
}


@InProceedings{SothornprapakornHS2018,
  author = 	 "Sothornprapakorn, Sarocha and Hayashi, Shinpei and Saeki, Motoshi",
  title = 	 "Visualizing a tangled change for supporting its decomposition and commit construction",
  crossref =  "COMPSAC2018",
  pages = 	 "74--79",
}


@article{guo2019decomposing,
  title={Decomposing composite changes for code review and regression test selection in evolving software},
  author={Guo, Bo and Kwon, Young-Woo and Song, Myoungkyu},
  journal={Journal of Computer Science and Technology},
  volume={34},
  pages={416--436},
  year={2019},
  publisher={Springer}
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Understandability
%%%

@InProceedings{TaoDXZK2012,
  author = 	 "Tao, Yida and Dang, Yingnong and Xie, Tao and Zhang, Dongmei and Kim, Sunghun",
  title = 	 "How do software engineers understand code changes? {An} exploratory study in industry",
  crossref =  "FSE2012",
  pages = 	 "1--11",
}

@InProceedings{RamSCB2018,
  author = 	 "Ram, Achyudh and Sawant, Anand Ashok and Castelluccio, Marco and Bacchelli, Alberto",
  title = 	 "What makes a code change easier to review: an empirical investigation on code change reviewability",
  crossref =  "FSE2018",
  pages = 	 "201--212",
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Datasets
%%%


@InProceedings{Gousios2013,
  author = 	 "Georgios Gousios",
  title = 	 "The {GHTorrent} dataset and tool suite",
  crossref =  "MSR2013",
  pages = 	 "233-236",
  doi = 	 "https://doi.org/10.1109/MSR.2013.6624034",
}


@Article{MunaiahKCN2017,
  author = 	 "Munaiah, Nuthan and Kroh, Steven and Cabrey, Craig and Nagappan, Meiyappan",
  title = 	 "Curating {GitHub} for Engineered Software Projects",
  journal = 	 JEmpiricalSE,
  year = 	 2017,
  volume = 	 22,
  number = 	 6,
  pages = 	 "3219-3253",
  month = 	 dec,
  abstract =
   "Software forges like GitHub host millions of repositories. Software
    engineering researchers have been able to take advantage of such a large
    corpora of potential study subjects with the help of tools like GHTorrent
    and Boa. However, the simplicity in querying comes with a caveat: there are
    limited means of separating the signal (e.g. repositories containing
    engineered software projects) from the noise (e.g. repositories containing
    home work assignments). The proportion of noise in a random sample of
    repositories could skew the study and may lead to researchers reaching
    unrealistic, potentially inaccurate, conclusions. We argue that it is
    imperative to have the ability to sieve out the noise in such large
    repository forges.
    \par
    We propose a framework, and present a reference implementation of the
    framework as a tool called reaper, to enable researchers to select GitHub
    repositories that contain evidence of an engineered software project. We
    identify software engineering practices (called dimensions) and propose
    means for validating their existence in a GitHub repository. We used reaper
    to measure the dimensions of 1,857,423 GitHub repositories. We then used
    manually classified data sets of repositories to train classifiers capable
    of predicting if a given GitHub repository contains an engineered software
    project. The performance of the classifiers was evaluated using a set of
    200 repositories with known ground truth classification. We also compared
    the performance of the classifiers to other approaches to classification
    (e.g. number of GitHub Stargazers) and found our classifiers to outperform
    existing approaches. We found stargazers-based classifier (with 10 as the
    threshold for number of stargazers) to exhibit high precision (97\%) but an
    inversely proportional recall (32\%). On the other hand, our best
    classifier exhibited a high precision (82\%) and a high recall (86\%). The
    stargazer-based criteria offers precision but fails to recall a significant
    portion of the population.",
}


@inproceedings{GZ2014,
  author = {Gousios, Georgios and Zaidman, Andy},
  title = {A Dataset for Pull-based Development Research},
  booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories},
  series = {MSR 2014},
  year = {2014},
  isbn = {978-1-4503-2863-0},
  location = {Hyderabad, India},
  pages = {368--371},
  numpages = {4},
  doi = {10.1145/2597073.2597122},
  acmid = {2597122},
  publisher = {ACM},
  address = {New York, NY, USA},
  keywords = {distributed software development, empirical software engineering, pull request, pull-based development},
}


@inproceedings{10.1145/2597073.2597124,
author = {Passos, Leonardo and Czarnecki, Krzysztof},
title = {A Dataset of Feature Additions and Feature Removals from the Linux Kernel},
year = {2014},
isbn = {9781450328630},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/2597073.2597124},
doi = {10.1145/2597073.2597124},
abstract = {This paper describes a dataset of feature additions and removals in the Linux kernel evolution history, spanning over seven years of kernel development. Features, in this context, denote configurable system options that users select when creating customized kernel images. The provided dataset is the largest corpus we are aware of capturing feature additions and removals, allowing researchers to assess the kernel evolution from a feature-oriented point-of-view. Furthermore, the dataset can be used to better understand how features evolve over time, and how different artifacts change as a result. One particular use of the dataset is to provide a real-world case to assess existing support for feature traceability and evolution. In this paper, we detail the dataset extraction process, the underlying database schema, and example queries. The dataset is directly available at our Bitbucket repository: https://bitbucket.org/lpassos/kconfigdb},
booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories},
pages = {376-379},
numpages = {4},
keywords = {Linux, Evolution, Version Control History, Traceability},
location = {Hyderabad, India},
series = {MSR 2014}
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Refactoring
%%%


@InProceedings{DigNMJ2006,
  author = 	 "Dig, Danny and Nguyen, Tien N. and Manzoor, Kashif and Johnson, Ralph",
  title = 	 "{MolhadoRef}: A Refactoring-Aware Software Configuration Management Tool",
  crossref =  "OOPSLACompanion2006",
  year = 	 2006,
  pages = 	 "732-733",
  abstract =  "Refactoring tools allow programmers to change their source code quicker than before. However, the complexity of these changes cause versioning tools that operate at a file level to lose the history of entities and be unable to merge refactored entities. This problem can be solved by semantic, operation-based SCM with persistent IDs. We propose that versioning tools be aware of program entities and refactoring operations. We present MolhadoRef, our prototype, which uses these techniques to ensure that it never loses history. MolhadoRef can successfully merge edit and refactoring operations which were performed on different development branches.",
}


@InProceedings{Freese2006,
  author = 	 "Freese, Tammo",
  title = 	 "Refactoring-Aware Version Control",
  crossref =  "ICSE2006",
  pages = 	 "953-956",
}


@InProceedings{ShenZZLJW2019,
  author = 	 "Shen, Bo and Zhang, Wei and Zhao, Haiyan and Liang, Guangtai and Jin, Zhi and Wang, Qianxiang",
  title = 	 "{IntelliMerge}: A refactoring-aware software merging technique",
  crossref =  "OOPSLA2019",
  pages = 	 "170:1-170:28",
}


@MastersThesis{Ellis2022,
  author = 	 "Max Ellis",
  title = 	 "A Systematic Comparison of Two Refactoring-aware Merging Techniques",
  school = 	 "University of Alberta",
  year = 	 2022,
  address = 	 "Alberta, Canada",
}


@Article{EllisND2023,
  author = 	 "Ellis, Max and Nadi, Sarah and Dig, Danny",
  title = 	 "Operation-Based Refactoring-Aware Merging: An Empirical Evaluation",
  journal = 	 IEEETSE,
  year = 	 2023,
  volume = 	 49,
  number = 	 4,
  pages = 	 "2698-2721",
  month = 	 apr,
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Other
%%%

@Article{Rochkind75,
  author = 	 "Marc J. Rochkind",
  title = 	 "The Source Code Control System",
  journal = 	 IEEETSE,
  year = 	 1975,
  volume = 	 1,
  number = 	 4,
  pages = 	 "364-370",
  month = 	 dec,
}


@InProceedings{BallKPS97,
  author = 	 "Thomas Ball and Jung-Min Kim and Adam A. Porter and Harvey P. Siy",
  title = 	 "If Your Version Control System Could Talk \ldots",
  crossref =     "PMESSE97",
  NEEDpages = 	 "*",
}


@InProceedings{EstlerNFM2014,
  author = 	 "H.-Christian Estler and Martin Nordio and Carlo A. Furia and Bertrand Meyer",
  title = 	 "Awareness and merge conflicts in distributed software development",
  booktitle = "2014 IEEE 9th International Conference on Global Software Engineering",
  year = 	 2014,
  pages = 	 "26-35",
  month = 	 aug,
  NEEDaddress = 	 "*",
}


@InProceedings{MusluBNC2014,
  author = 	 "K{\i}van{\c{c}} Mu{\c{s}}lu and Christian Bird and Nachi Nagappan and Jacek Czerwonka",
  authorASCII = 	 "Kivanc Muslu and Christian Bird and Nachi Nagappan and Jacek Czerwonka",
  title = 	 "Transition from centralized to distributed version control systems: A case study on reasons, barriers, and outcomes",
  crossref =     "ICSE2014",
  pages = 	 "334--344",
}


@InProceedings{TianZSJL2022,
  author = 	 "Yingchen Tian and Yuxia Zhang and Klaas-Jan Stol and Lin Jiang and Hui Liu",
  title = 	 "What Makes a Good Commit Message?",
  crossref =  "ICSE2022",
  NEEDpages = 	 "*",
}


@inproceedings{10.1145/1294948.1294975,
author = {De Lucia, Andrea and Fasano, Fausto and Oliveto, Rocco and Santonicola, Domenico},
title = {Improving Context Awareness in Subversion through Fine-Grained Versioning of Java Code},
year = {2007},
isbn = {9781595937223},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1294948.1294975},
doi = {10.1145/1294948.1294975},
abstract = {In this paper, we present an extension of the Subversion command line to support fine-grained versioning of Java code. To this aim, for each Java file under versioning, an XML-based file representing the logical structure of the original file is automatically built by parsing the code. An additional XML-based file is also built to model collaboration constraints. This information is useful to enrich the context awareness by providing developers information about changes made by others to the same logical unit (i.e., class, method, or attribute) of the Java file. Finally, we present an extension of Subclipse, a Subversion front-end implemented as an Eclipse plug-in, aiming to support the fine-grained versioning in Subversion.},
booktitle = {Ninth International Workshop on Principles of Software Evolution: In Conjunction with the 6th ESEC/FSE Joint Meeting},
pages = {110-113},
numpages = {4},
keywords = {version control, integrated development environment},
location = {Dubrovnik, Croatia},
series = {IWPSE '07}
}


@inproceedings{10.5555/2662737.2662745,
author = {Foucault, Matthieu and Barbier, S\'{e}bastien and Lugato, David},
title = {Enhancing Version Control with Domain-Specific Semantics},
year = {2013},
isbn = {9781467364478},
publisher = {IEEE Press},
abstract = {As Domain-Specific Modeling Languages (DSML) become more widespread, it is now possible for non-computer scientists to design complex systems. Not only do DSMLs make modeling more accessible to domain experts, they also improve their efficiency. Using such models, the problem of their maintenance and evolution arises, with common software engineering issues, such as reversibility and traceability of developments.In this article we present the integration of a collaborative edition process into a model-driven engineering (MDE) simulation platform defining its own DSML, which includes a serialization procedure. Our novel approach defines a collaborative work and version control process. This process uses the concrete textual syntax defined by this DSML, which allows us to use the features of existing Version Control Systems (VCS) to work with serialized models, and helps calculate the differences between two versions of a model, merge versions of these models, and detect merge conflicts.},
booktitle = {Proceedings of the 5th International Workshop on Modeling in Software Engineering},
pages = {31-36},
numpages = {6},
location = {San Francisco, California},
series = {MiSE '13}
}


@inproceedings{10.1145/1012807.1012859,
author = {Nguyen, Tien N. and Munson, Ethan V. and Boyland, John T.},
title = {The Molhado Hypertext Versioning System},
year = {2004},
isbn = {1581138482},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1012807.1012859},
doi = {10.1145/1012807.1012859},
abstract = {This paper describes Molhado, a hypertext versioning and software configuration management system that is distinguished from previous systems by its flexible product versioning and structural configuration management model. The model enables a unified versioning framework for atomic and composite software artifacts, and hypermedia structures among them in a fine-grained manner at the logical level. Hypermedia structures are managed separately from documents' contents. Molhado explicitly represents hyperlinks, allowing them to be browsed, visualized, and systematically analyzed. Molhado not only versions complex hypermedia structures (e.g., multi links), but also supports versioning of individual hyperlinks. This paper focuses on Molhado's hypertext versioning and its use in the Software Concordance environment to manage the evolution of a software project and hypermedia structures.},
booktitle = {Proceedings of the Fifteenth ACM Conference on Hypertext and Hypermedia},
pages = {185-194},
numpages = {10},
keywords = {hypertext versioning, version control, software engineering, software configuration management},
location = {Santa Cruz, CA, USA},
series = {HYPERTEXT '04}
}


@inproceedings{10.1145/1860559.1860621,
author = {M\"{u}ller, Arthur and R\"{o}nnau, Sebastian and Borghoff, Uwe M.},
title = {A File-Type Sensitive, Auto-Versioning File System},
year = {2010},
isbn = {9781450302319},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1860559.1860621},
doi = {10.1145/1860559.1860621},
abstract = {Auto-versioning file systems offer a simple and reliable interface to document change control. The implicit versioning of documents at each write access catches the whole evolution of a document, thus supporting regulatory compliance rules. Most existing file systems work on low abstraction levels and track the document evolution on their binary representation. Higher-level differencing tools allow for a far more meaningful change-tracking, though.In this paper, we present an auto-versioning file system that is able to handle files depending on their file type. This way, a suitable differencing tool can be assigned to each file type. Our approach supports regulatory compliant storage as well as the archiving of documents},
booktitle = {Proceedings of the 10th ACM Symposium on Document Engineering},
pages = {271-274},
numpages = {4},
keywords = {file system, regulatory compliance, version control, auto-versioning, document management},
location = {Manchester, United Kingdom},
series = {DocEng '10}
}


@inproceedings{10.1145/2034691.2034713,
author = {Thao, Cheng and Munson, Ethan V.},
title = {Version-Aware XML Documents},
year = {2011},
isbn = {9781450308632},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/2034691.2034713},
doi = {10.1145/2034691.2034713},
abstract = {A document often goes through many revisions before it is finalized. In the normal document creation process, newer revisions overwrite older ones and only the final revision is kept. At any stage of document creation, it might be desirable to see how the document came to its current form or to revert back to a previous revision. Conventional version control tools such as CVS could help authors do exactly this. However, these tools are unlikely to be adopted by non-technical document authors due to the overhead of managing a repository and the tools' learning curves.This paper presents an approach called version-aware documents that embeds versioning data within the document thus making version control for single documents a seamless part of the authoring process.},
booktitle = {Proceedings of the 11th ACM Symposium on Document Engineering},
pages = {97-100},
numpages = {4},
keywords = {xml document, collaborative editing, version control},
location = {Mountain View, California, USA},
series = {DocEng '11}
}


@inproceedings{10.1145/2889160.2889262,
author = {Linsbauer, Lukas and Egyed, Alexander and Lopez-Herrejon, Roberto Erick},
title = {A Variability Aware Configuration Management and Revision Control Platform},
year = {2016},
isbn = {9781450342056},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/2889160.2889262},
doi = {10.1145/2889160.2889262},
abstract = {Modern systems need to run in many different contexts like hardware and software platforms or environmental conditions. Additionally different customers might have slightly different requirements towards systems. Therefore software systems need to be highly configurable and provide variable sets of features for different customers. There are various approaches to developing and managing such systems, like ad-hoc clone-and-own approaches or structured software product line approaches for each of which again several different techniques and tools exist to support them. While the different approaches come with advantages they also have several disadvantages and shortcomings. Some work only with specific implementation artifacts (e.g. source code but not models) and others exist only as plugins for specific IDEs which makes them intrusive or even unusable in some development environments. In our work we present a development process and tools for managing and engineering of highly configurable and variable systems that is generic, incremental, flexible and intuitive. We evaluated our approach on several case study systems from various different domains and origins like open source, academia or industry. The results so far showed promising results.},
booktitle = {Proceedings of the 38th International Conference on Software Engineering Companion},
pages = {803-806},
numpages = {4},
keywords = {variants, features, configuration, versioning, variability},
location = {Austin, Texas},
series = {ICSE '16}
}


@inproceedings{10.1145/1456536.1456576,
author = {Junqueira, Daniel C. and Bittar, Thiago J. and Fortes, Renata P. M.},
title = {A Fine-Grained and Flexible Version Control for Software Artifacts},
year = {2008},
isbn = {9781605580838},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1456536.1456576},
doi = {10.1145/1456536.1456576},
abstract = {Version control is an activity very important for high-quality software production. The structure used by version control systems is the same used by file systems, but in general the abstraction level made by software developers considers the file contents and its internal structure, including details as classes, methods, control blocks and others. Fine-grained version control tools can provide a more detailed version control. However traditional tools and models provide very low flexibility and present high cost and impact of deployment in software development environments. In this paper, there are presented a model and a tool which aim at providing support to fine-grained version control activities.},
booktitle = {Proceedings of the 26th Annual ACM International Conference on Design of Communication},
pages = {185-192},
numpages = {8},
keywords = {version control, software configuration management},
location = {Lisbon, Portugal},
series = {SIGDOC '08}
}


@InProceedings{BaAS2013,
  author = 	 "Ba, M. Lamine and Abdessalem, Talel and Senellart, Pierre",
  title = 	 "Uncertain Version Control in Open Collaborative Editing of Tree-Structured Documents",
  crossref =  "DocEng2013",
  pages = 	 "27-36",
}


@article{10.1145/197320.197403,
author = {Berzins, Valdis},
title = {Software Merge: Semantics of Combining Changes to Programs},
year = {1994},
issue_date = {Nov. 1994},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {16},
number = {6},
issn = {0164-0925},
url = {https://doi.org/10.1145/197320.197403},
doi = {10.1145/197320.197403},
abstract = {We present a language-independent semantic model of the process of combining changes to programs. This model extends the domains used in denotational semantics (complete partial orders) to Boolean algebras, and represents incompatible modifications as well as compatible extensions. The model is used to define the intended semantics of change-merging operations on programs and to establish some general properties of software merging. We determine conditions under which changes to subprograms of a software system can be merged independently and illustrate cases where this is not possible.},
journal = {ACM Trans. Program. Lang. Syst.},
month = {nov},
pages = {1875-1903},
numpages = {29},
keywords = {software change merging, domains, software maintenance, semantics}
}


@Article{HerboldHTLAGCBNMASSMHLWRPCPVSQCDWWMAAATEWMSFHKTTSPWDSAASE2022,
  author = 	 "Herbold, Steffen and Trautsch, Alexander and Ledel, Benjamin and Aghamohammadi, Alireza and Ghaleb, Taher A. and Chahal, Kuljit Kaur and Bossenmaier, Tim and Nagaria, Bhaveet and Makedonski, Philip and Ahmadabadi, Matin Nili and Szabados, Kristof and Spieker, Helge and Madeja, Matej and Hoy, Nathaniel and Lenarduzzi, Valentina and Wang, Shangwen and Rodríguez-Pérez, Gema and Colomo-Palacios, Ricardo and Verdecchia, Roberto and Singh, Paramvir and Qin, Yihao and Chakroborti, Debasish and Davis, Willard and Walunj, Vijay and Wu, Hongjun and Marcilio, Diego and Alam, Omar and Aldaeej, Abdullah and Amit, Idan and Turhan, Burak and Eismann, Simon and Wickert, Anna-Katharina and Malavolta, Ivano and Sulír, Matúš and Fard, Fatemeh and Henley, Austin Z. and Kourtzanidis, Stratos and Tuzun, Eray and Treude, Christoph and Shamasbi, Simin Maleki and Pashchenko, Ivan and Wyrich, Marvin and Davis, James and Serebrenik, Alexander and Albrecht, Ella and Aktas, Ethem Utku and Strüber, Daniel and Erbel, Johannes",
  title = 	 "A fine-grained data set and analysis of tangling in bug fixing commits",
  journal = 	 JEmpiricalSE,
  year = 	 2022,
  volume = 	 27,
  number = 	 125,
  month = 	 jul,
}


@Misc{stack-overflow-developer-survey:2022,
  author = 	 "{Stack Overflow}",
  title = 	 "2022 developer survey",
  howpublished = "\url{https://survey.stackoverflow.co/2022}",
  month = 	 may,
  year = 	 2022,
}


@InProceedings{KawrykowR2011,
  author = 	 "Kawrykow, David and Robillard, Martin P.",
  title = 	 "Non-essential changes in version histories",
  crossref =  "ICSE2011",
  pages = 	 "351-360",
}


% LocalWords:  Kasi Sarma KQ177 BERZINS POSTGRAD V550 P229 ISSN Apel
% LocalWords:  Subfile SciSearch InProceedings ApelLBLK2011 rg Liebig
% LocalWords:  Brandl Lengauer stner authorASCII Jorg Kastner FSE2011
% LocalWords:  Semistructured crossref semistructured FSTMerge Borba
% LocalWords:  FSTGENERATOR TrindadeTavaresBCS2019 Trindade Tavares
% LocalWords:  Cavalcanti Guilherme Soares Sérgio ase2019 jsFSTMerge
% LocalWords:  Estler Nordio Furia De Rosso untracking Gitless async
% LocalWords:  Untracked Gitless's TechReport Binkley91 Binkley diff3
% LocalWords:  UWMadison HorwitzRB90 HorwitzPR88a Horwitz Prins jul
% LocalWords:  POPL88 HorwitzPR89 toplas HorwitzR90 DeepMerge Dinella
% LocalWords:  Mytkowicz Svyatkovskiy Mayur Naik Shuvendu Lahiri Neel
% LocalWords:  DeepMerge's MergeBERT Mytcowicz Negar BugBuilder ba Bo
% LocalWords:  Ghorbani Fakhoury Sundaresan Shen Zhao Haiyan Liang P5
% LocalWords:  Guangtai Jin Zhi Qianxiang IntelliMerge url doi Proc
% LocalWords:  oct articleno numpages Gleiph Ghiotto Murta Márcio der
% LocalWords:  André Hoek Mens Siy Votta biburl ee interhash Softw B1
% LocalWords:  ec6330519d62f394ded0f10d2d00cbd0 intrahash Methodol M0
% LocalWords:  dc01e62b9b4e35e01890251f3de81e1d dblp PerrySV01 enich
% LocalWords:  Mens2002 PerrySV2001 ApelLL2012 authorUTF Leßenich Ulf
% LocalWords:  Lessenich ICSE2012 Dillig Isil SafeMerge Bernhard SHA
% LocalWords:  Westfechtel Asklund NWPER Ignat IWCES IkiWiki Tichy lu
% LocalWords:  overbroad ICSM variant1 Kästner inproceedings isbn SCM
% LocalWords:  booktitle fstmerge featurehouse Autom Accioly Taweesup
% LocalWords:  ApiwattanapongOH2004 Apiwattanapong Alessandro Orso n1
% LocalWords:  Harrold ASE2004 JDIFF supersededby ASEjournal JDiff n2
% LocalWords:  ApiwattanapongOH2007 usesDaikonAsTestSubject JABA VCSs
% LocalWords:  Cdiff Differencer bibsource Lucent Springer Verlag dec
% LocalWords:  Magnusson ECOOP'98 BallKPS97 PMESSE97 NEEDpages SCCS
% LocalWords:  Rochkind75 Rochkind IEEETSE MusluBNC2014 Nachi Jacek
% LocalWords:  Nagappan Czerwonka ICSE2014 HuntT2002 Nadi Kashif Nho
% LocalWords:  Tien Dimitar Asenov Balz Guenat uller Otth FASE eg JDT
% LocalWords:  GumTree parentId IntegerLiteral oldNode newNode RxJava
% LocalWords:  XHTML DaisyDiff OOPSLA2019 Mahmoudi PEGs subtrees jGit
% LocalWords:  Gumtree LessenichAL2014 JDime CavalcantiBA2017 Paola V1
% LocalWords:  OOPSLA2017 GitMiner Zimmermann Ellis2022 MolhadoRef V2
% LocalWords:  RefMerge refactoringaware arXiv RQ1 RQ2 LOC DigMNJ2006
% LocalWords:  Manzoor DigMJN2007 Molhado subgraphs node1 node2 Ladd
% LocalWords:  JacksonL1994 ICSM94 invar outvar intra JiangLNZH2021
% LocalWords:  JiangLLZCNZHBZ2022 Yanjie Jiang Liu Niu Yamin Hu Luo
% LocalWords:  RefactoringMiner RMiner tokenwise Xiaoqing Zhu Zhihao
% LocalWords:  Xiaye Yuxia Bian ieeetse NEEDvolume NEEDnumber TSE m1
% LocalWords:  NEEDmonth preprint BugBuilder's minimizations DiffCat
% LocalWords:  GrowingBugs fallthrough DashAB2018 Santanu Kumar Guo's
% LocalWords:  Allamanis Miltiadis RefiNym fse2018 OCallahanJ97 m3 un
% LocalWords:  GuoPME2006 behaviour Laski Szermer CFGs ECFGs ECFG DFS
% LocalWords:  Interclass Szermer's FalleriMBMM2014 Falleri Morandat
% LocalWords:  Flor Blanc Matias Monperrus Remy Floreal ASE2014 expr
% LocalWords:  updateValue addNode deleteNode moveNode matchings str
% LocalWords:  mapping's Autochrome Clojure McQueeney pathfinding csv
% LocalWords:  subtree Kawrykow Robillard PartachiDAB2020 identiier's
% LocalWords:  localisation reifies bimodal AssignE strlen RefiNym's
% LocalWords:  rewriter const representativeness monotype UDTs UDT sc
% LocalWords:  ConfigurationItemFactory LogFactory LoggingRule rtachi
% LocalWords:  ConsoleRowHighlightingRule Profir Petru Partachi NFG
% LocalWords:  Flexeme FSE2020 lexemes neighbourhoods neighbourhood
% LocalWords:  Weisfeiler Heddle hypothesise Herzig datapoints Roover
% LocalWords:  Pattison ShenZKZWLJ2021 SmartCommit FSE2021 Zeller aug
% LocalWords:  clusterings EBNF ExpandPlaceholder DeleteIncrement Xu
% LocalWords:  lnsertElementlnEmptyList PrelnsertElementlnList Andr
% LocalWords:  PostlnsertElementlnList ExtendIdentifier ShenZZLJW2019
% LocalWords:  GhiottoMBvdH2020 authorASCII1 authorASCII2 NEEDaddress
% LocalWords:  EstlerNFM2014 DVCS ernst MusluSBE2015 Swart ASE2015 PY
% LocalWords:  KirinukiHHK2016 Kirinuki Hiroyuki Higo Yoshiki Hotta
% LocalWords:  Keisuke Kusumoto Shinji APSEC1026 ArgoUML Siyu Yao EP
% LocalWords:  Shengbin Feng Internetware TianZSJL2022 Yingchen Tian
% LocalWords:  Klaas Stol ICSE2022 MyFile uncurated JOUR Nugroho Hata
% LocalWords:  Yusuf Sulistyo Hideaki Matsumoto Kenichi VL APSEC2016
% LocalWords:  Nugroho2020 ChenXYX2022 Internetware2022 CoRA João von
% LocalWords:  jFSTMerge IWCES'17 difftastic Baudry Spork Koegel RCS
% LocalWords:  Herrmannsdoerfer Markus Wesendonk Helming Malaga IWMCP
% LocalWords:  Seyboth CVSM Vion Dury DocEng Lamine Abdessalem Talel
% LocalWords:  Senellart Kudrjavets Gunnar Nachiappan Rastogi Ayushi
% LocalWords:  Gerrit Phabricator SLOC Georgios Gousios GHTorrent GH
% LocalWords:  Ilya Grisgorik BigQuery githubarchive Nuthan Munaiah
% LocalWords:  Kroh Cabrey Meiyappan Engg Zaidman Eirini Kalliamvakou
% LocalWords:  PRs H1 H4 centric SemanGit Kubitza ockmann Graux Vadim
% LocalWords:  Markovtsev Waren Landman Serebrenik Jurgen Vinju ICSME
% LocalWords:  Sourcerer Qualitas Ewan Tempero Anslow Jing Lumpe GZ2014
% LocalWords:  APSEC acmid Passos Czarnecki Krzysztof Fasano Oliveto
% LocalWords:  Santonicola Domenico Subclipse IWPSE Matthieu Barbier
% LocalWords:  bastien Lugato DSML DSMLs MiSE Munson Boyland ller Uwe
% LocalWords:  Molhado's nnau Borghoff Thao Cheng Linsbauer Lukas
% LocalWords:  Egyed Herrejon Junqueira Bittar Thiago Renata SIGDOC
% LocalWords:  issn