From a38d72a9deca28f92a8caff6b236ab08455747a9 Mon Sep 17 00:00:00 2001 From: Tatsuhiko Kubo Date: Sat, 19 Mar 2016 18:18:05 +0900 Subject: [PATCH] init --- .gitignore | 31 ++ .travis.yml | 16 + CONTRIBUTORS | 2 + COPYING | 30 ++ ChangeLog | 258 ++++++++++++ README.md | 683 +++++++++++++++++++++++++++++++ SConstruct | 7 + dtl/Diff.hpp | 692 ++++++++++++++++++++++++++++++++ dtl/Diff3.hpp | 245 +++++++++++ dtl/Lcs.hpp | 55 +++ dtl/Sequence.hpp | 65 +++ dtl/Ses.hpp | 132 ++++++ dtl/dtl.hpp | 47 +++ dtl/functors.hpp | 137 +++++++ dtl/variables.hpp | 142 +++++++ examples/SConstruct | 42 ++ examples/bdiff.cpp | 73 ++++ examples/common.cpp | 20 + examples/common.hpp | 13 + examples/fpatch.cpp | 73 ++++ examples/intdiff.cpp | 49 +++ examples/intdiff3.cpp | 57 +++ examples/patch.cpp | 44 ++ examples/printers.hpp | 26 ++ examples/st2ses.cpp | 35 ++ examples/strdiff.cpp | 42 ++ examples/strdiff3.cpp | 35 ++ examples/strdiff_cp.cpp | 40 ++ examples/unidiff.cpp | 111 +++++ examples/unistrdiff.cpp | 40 ++ test/Intdifftest.cpp | 144 +++++++ test/Objdifftest.cpp | 116 ++++++ test/Patchtest.cpp | 98 +++++ test/SConstruct | 53 +++ test/Strdiff3test.cpp | 208 ++++++++++ test/Strdifftest.cpp | 299 ++++++++++++++ test/comparators.hpp | 24 ++ test/dtl_test.cpp | 10 + test/dtl_test_common.cpp | 56 +++ test/dtl_test_common.hpp | 70 ++++ test/hunks/intdiff/diff_test0 | 0 test/hunks/intdiff/diff_test1 | 2 + test/hunks/intdiff/diff_test2 | 2 + test/hunks/intdiff/diff_test3 | 15 + test/hunks/intdiff/diff_test4 | 8 + test/hunks/strdiff/custom_test0 | 0 test/hunks/strdiff/diff_test0 | 5 + test/hunks/strdiff/diff_test1 | 15 + test/hunks/strdiff/diff_test2 | 10 + test/hunks/strdiff/diff_test3 | 9 + test/hunks/strdiff/diff_test4 | 6 + test/hunks/strdiff/diff_test5 | 0 test/hunks/strdiff/diff_test6 | 2 + test/hunks/strdiff/diff_test7 | 2 + test/hunks/strdiff/diff_test8 | 22 + test/ses/intdiff/diff_test0 | 0 test/ses/intdiff/diff_test1 | 1 + test/ses/intdiff/diff_test2 | 1 + test/ses/intdiff/diff_test3 | 14 + test/ses/intdiff/diff_test4 | 7 + test/ses/strdiff/custom_test0 | 3 + test/ses/strdiff/diff_test0 | 4 + test/ses/strdiff/diff_test1 | 14 + test/ses/strdiff/diff_test2 | 9 + test/ses/strdiff/diff_test3 | 8 + test/ses/strdiff/diff_test4 | 6 + test/ses/strdiff/diff_test5 | 0 test/ses/strdiff/diff_test6 | 1 + test/ses/strdiff/diff_test7 | 1 + test/ses/strdiff/diff_test8 | 21 + 70 files changed, 4508 insertions(+) create mode 100644 .gitignore create mode 100644 .travis.yml create mode 100644 CONTRIBUTORS create mode 100644 COPYING create mode 100644 ChangeLog create mode 100644 README.md create mode 100644 SConstruct create mode 100644 dtl/Diff.hpp create mode 100644 dtl/Diff3.hpp create mode 100644 dtl/Lcs.hpp create mode 100644 dtl/Sequence.hpp create mode 100644 dtl/Ses.hpp create mode 100644 dtl/dtl.hpp create mode 100644 dtl/functors.hpp create mode 100644 dtl/variables.hpp create mode 100644 examples/SConstruct create mode 100644 examples/bdiff.cpp create mode 100644 examples/common.cpp create mode 100644 examples/common.hpp create mode 100644 examples/fpatch.cpp create mode 100644 examples/intdiff.cpp create mode 100644 examples/intdiff3.cpp create mode 100644 examples/patch.cpp create mode 100644 examples/printers.hpp create mode 100644 examples/st2ses.cpp create mode 100644 examples/strdiff.cpp create mode 100644 examples/strdiff3.cpp create mode 100644 examples/strdiff_cp.cpp create mode 100644 examples/unidiff.cpp create mode 100644 examples/unistrdiff.cpp create mode 100644 test/Intdifftest.cpp create mode 100644 test/Objdifftest.cpp create mode 100644 test/Patchtest.cpp create mode 100644 test/SConstruct create mode 100644 test/Strdiff3test.cpp create mode 100644 test/Strdifftest.cpp create mode 100644 test/comparators.hpp create mode 100644 test/dtl_test.cpp create mode 100644 test/dtl_test_common.cpp create mode 100644 test/dtl_test_common.hpp create mode 100644 test/hunks/intdiff/diff_test0 create mode 100644 test/hunks/intdiff/diff_test1 create mode 100644 test/hunks/intdiff/diff_test2 create mode 100644 test/hunks/intdiff/diff_test3 create mode 100644 test/hunks/intdiff/diff_test4 create mode 100644 test/hunks/strdiff/custom_test0 create mode 100644 test/hunks/strdiff/diff_test0 create mode 100644 test/hunks/strdiff/diff_test1 create mode 100644 test/hunks/strdiff/diff_test2 create mode 100644 test/hunks/strdiff/diff_test3 create mode 100644 test/hunks/strdiff/diff_test4 create mode 100644 test/hunks/strdiff/diff_test5 create mode 100644 test/hunks/strdiff/diff_test6 create mode 100644 test/hunks/strdiff/diff_test7 create mode 100644 test/hunks/strdiff/diff_test8 create mode 100644 test/ses/intdiff/diff_test0 create mode 100644 test/ses/intdiff/diff_test1 create mode 100644 test/ses/intdiff/diff_test2 create mode 100644 test/ses/intdiff/diff_test3 create mode 100644 test/ses/intdiff/diff_test4 create mode 100644 test/ses/strdiff/custom_test0 create mode 100644 test/ses/strdiff/diff_test0 create mode 100644 test/ses/strdiff/diff_test1 create mode 100644 test/ses/strdiff/diff_test2 create mode 100644 test/ses/strdiff/diff_test3 create mode 100644 test/ses/strdiff/diff_test4 create mode 100644 test/ses/strdiff/diff_test5 create mode 100644 test/ses/strdiff/diff_test6 create mode 100644 test/ses/strdiff/diff_test7 create mode 100644 test/ses/strdiff/diff_test8 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..41bdf4d --- /dev/null +++ b/.gitignore @@ -0,0 +1,31 @@ +examples/unidiff +examples/unistrdiff +examples/strdiff +examples/strdiff_cp +examples/strdiff3 +examples/intdiff +examples/bdiff +examples/intdiff3 +examples/patch +examples/fpatch +examples/st2ses +test/strdiff3_test +test/*/*/*_ +*.o +*.orig +*~ +.semantic.cache +.DS_Store +*.swp +GPATH +GRTAGS +GSYMS +GTAGS +TAGS +dtl_test +.sconsign.dblite +.sconf_temp +config.log +^\# +\#$ +\.\# diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..49cda0f --- /dev/null +++ b/.travis.yml @@ -0,0 +1,16 @@ +language: cpp +compiler: + - gcc + - clang +before_script: + - wget http://googletest.googlecode.com/files/gtest-1.7.0.zip + - unzip -q gtest-1.7.0.zip + - cd gtest-1.7.0 + - ./configure + - make + - cd .. +script: + - cd examples + - scons + - cd ../test + - GTEST_ROOT=$PWD/../gtest-1.7.0 scons check diff --git a/CONTRIBUTORS b/CONTRIBUTORS new file mode 100644 index 0000000..d02130a --- /dev/null +++ b/CONTRIBUTORS @@ -0,0 +1,2 @@ +Tatsuhiko Kubo +Jan Weiß diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..50b2ef0 --- /dev/null +++ b/COPYING @@ -0,0 +1,30 @@ +In short, Diff Template Library is distributed under so called "BSD license", + +Copyright (c) 2015 Tatsuhiko Kubo +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of the authors nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..e2f22db --- /dev/null +++ b/ChangeLog @@ -0,0 +1,258 @@ +2015-05-03 Tatsuhiko Kubo + + * added some minor changes. + + * 1.19 released + +2013-01-10 Tatsuhiko Kubo + + * add printSES function for custom Printer Thanks to Hu Shubin + + * remove files related mercurial + + * 1.18 released + +2012-11-18 Tatsuhiko Kubo + + * add version-string variable + + * replace get_current_dir_name to getcwd(get_current_dir_name is not in Mountain Lion) + + * fix small typo + + * 1.17 released + +2012-10-26 Tatsuhiko Kubo , Jan Weiß + + * Improving comments + + * rewrite README with markdown-style + + * support gtest1.6 + + * allow clients to handle if DTL swaps the arrays it is passed + + * fix incorrect results for SES_COMMON when swapped + + * add explicit initialization of ses + + * implement option to force the order of sesElem objects in the ses + + * fix problem -> issues/4(test succeeds when result-file does not exist) + + * 1.16 released + +2011-11-10 Tatsuhiko Kubo + + * remove unused variable + + * 1.15 released + +2011-06-18 Tatsuhiko Kubo + + * fix Issue #7 (isOnlyOneOperation returns incorrect value) + + * 1.14 released + +2011-04-10 Tatsuhiko Kubo + + * add check for libraries with SCons + + * add installer with SCons + + * add sample of diff(bdiff) + + * 1.13 released + +2011-01-23 Tatsuhiko Kubo + + * forget to add template parameter of stream for print methods of printUnifiedFormat + + * 1.12 released + +2011-01-01 Tatsuhiko Kubo + + * add template parameter of stream for print methods + + * function of importing ses + + * 1.11 released + +2010-11-23 Tatsuhiko Kubo + + * use static_cast instead of C-style cast + + * remove specifyConfliction + + * 1.10 released + +2010-09-20 Tatsuhiko Kubo + + * fix bug of specifyConfliction + + * add parameter for Diff3's comparator + + * fix the bug which uniPatch fails + + * add debug option for examples + + * divide test sources + + * 1.09 released + +2010-08-29 Tatsuhiko Kubo + + * fix the problem that comparator parameter is completely ignored. + + * remove the warning 'comparison of unsigned expression >= 0 is always true' ( use -W ) + + * move directory 'src' to 'dtl' + + * remove Makefile for examples and test + + * 1.08 released + +2010-07-11 Tatsuhiko Kubo + + * fix potential risk on 64bit environment(size of 'int' and size of 'size_t') + + * add scons + + * fix bug of specifyConfliction + + * fix memory leak when onlyEditDistance is true + + * change indent size from 2 to 4 + + * 1.07 released + +2010-05-01 Tatsuhiko Kubo + + * add specifyConfliction + + * 1.06 released + +2010-04-13 Tatsuhiko Kubo + + * constructors takes referenced parameter + + * fix seg fault bug (http://code.google.com/p/dtl-cpp/issues/detail?id=2) + + * 1.05 released + +2009-12-01 Tatsuhiko Kubo + + * add test programs with googletest + + * add sample of diff(uintdiff) + + * 1.04 released + +2009-10-02 Tatsuhiko Kubo + + * add function getLcsVec + + * divide header files + + * 1.03 released + +2009-09-08 Tatsuhiko Kubo + + * rename editType to edit_t + + * add print functions + + * add functor of compare + + * use 'using' declaration + + * refactoring + + * add ignore patterns for Mercurial + + * 1.02 released. + +2009-08-08 Tatsuhiko Kubo + + * append appropriate const keyword + + * refactoring + + * 1.01 released. + +2009-07-04 Tatsuhiko Kubo + + * resolve problem memory leak occurs when default copy constructor set in motion. + + * 1.00 released. + +2009-06-08 Tatsuhiko Kubo + + * enhance readability + + * change behavior when conflicted + + * decliment parameter for patch function + + * 0.07 released. + +2009-05-08 Tatsuhiko Kubo + + * add flag for recording only editdistance + + * add sample of unidiff for string (unistrdiff) + + * add unserious diff. ver 0.03 has this feture. + + * fix bug for merge + + * add function getUniHunks + + * 0.06 released. + +2008-12-12 Tatsuhiko Kubo + + * add sample of diff3 (strdiff3, intdiff3) + + * add diff3 + + * + -> - to - -> + with Unified Diff Format + + * add function uniPatch. this function can patch with Unified Diff Format Data Structure + + * dtl Import Unified Diff Format Data Structure from unidiff.cpp. + + * fix bug. function check whether file exists(common.cpp) + + * 0.05 released. + +2008-11-10 Tatsuhiko Kubo + + * Improves accuracy of diff + + * fix serious memory bug for patch + + * changed type of argument in function fileExists(common.hpp, common.cpp) + + * 0.04 released. + +2008-11-06 Tatsuhiko Kubo + + * add erorr check for sample programs + + * 0.03 released. + +2008-10-31 Tatsuhiko Kubo + + * rename ChangLog to ChangeLog + + * modifiy README + + * output OK message on patch and fpatch + + * 0.02 released. + +2008-10-30 Tatsuhiko Kubo + + * 0.01 released. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..6d4faa7 --- /dev/null +++ b/README.md @@ -0,0 +1,683 @@ +# dtl + +[![Build Status](https://travis-ci.org/cubicdaiya/dtl.png?branch=master)](https://travis-ci.org/cubicdaiya/dtl) + +`dtl` is the diff template library written in C++. The name of template is derived C++'s Template. + +# Table of contents + + * [Features](#features) + * [Getting started](#getting-started) + * [Compare two strings](#compare-two-strings) + * [Compare two data has arbitrary type](#compare-two-data-has-arbitrary-type) + * [Merge three sequences](#merge-three-sequences) + * [Patch function](#patch-function) + * [Difference as Unified Format](#difference-as-unified-format) + * [Compare large sequences](#compare-large-sequences) + * [Unserious difference](#unserious-difference) + * [Calculate only Edit Distance](#calculate-only-edit-distance) + * [Algorithm](#algorithm) + * [Computational complexity](#computational-complexity) + * [Comparison when difference between two sequences is very large](#comparison-when-difference-between-two-sequences-is-very-large) + * [Implementations with various programming languages](#implementations-with-various-programming-languages) + * [Examples](#examples) + * [strdiff](#strdiff) + * [intdiff](#intdiff) + * [unidiff](#unidiff) + * [unistrdiff](#unistrdiff) + * [strdiff3](#strdiff3) + * [intdiff3](#intdiff3) + * [patch](#patch) + * [fpatch](#fpatch) + * [Running tests](#running-tests) + * [Building test programs](#building-test-programs) + * [Running test programs](#running-test-programs) + * [License](#license) + +# Features + +`dtl` provides the functions for comparing two sequences have arbitrary type. But sequences must support random access\_iterator. + +# Getting started + +To start using this library, all you need to do is include `dtl.hpp`. + +```c++ +#include "dtl/dtl.hpp" +``` + +## Compare two strings + +First of all, calculate the difference between two strings. + +```c++ +typedef char elem; +typedef std::string sequence; +sequence A("abc"); +sequence B("abd"); +dtl::Diff< elem, sequence > d(A, B); +d.compose(); +``` + +When the above code is run, `dtl` calculates the difference between A and B as Edit Distance and LCS and SES. + +The meaning of these three terms is below. + +| Edit Distance | Edit Distance is numerical value for declaring a difference between two sequences. | +|:--------------|:-----------------------------------------------------------------------------------| +| LCS | LCS stands for Longest Common Subsequence. | +| SES | SES stands for Shortest Edit Script. I mean SES is the shortest course of action for tranlating one sequence into another sequence.| + +If one sequence is "abc" and another sequence is "abd", Edit Distance and LCS and SES is below. + +| Edit Distance | 2 | +|:--------------|:----------------| +| LCS | ab | +| SES | C a C b D c A d | + + * 「C」:Common + * 「D」:Delete + * 「A」:ADD + +If you want to know in more detail, please see [examples/strdiff.cpp](https://github.com/cubicdaiya/dtl/blob/master/examples/strdiff.cpp). + +This calculates Edit Distance and LCS and SES of two strings received as command line arguments and prints each. + +When one string is "abc" and another string "abd", the output of `strdiff` is below. + +```bash +$ ./strdiff abc abd +editDistance:2 +LCS:ab +SES + a + b +-c ++d +$ +``` + +## Compare two data has arbitrary type + +`dtl` can compare data has aribtrary type because of the C++'s template. + +But the compared data type must support the random access\_iterator. + +In the previous example, the string data compared, + +`dtl` can also compare two int vectors like the example below. + +```c++ +int a[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; +int b[10] = {3, 5, 1, 4, 5, 1, 7, 9, 6, 10}; +std::vector A(&a[0], &a[10]); +std::vector B(&b[0], &b[10]); +dtl::Diff< int > d(A, B); +d.compose(); +``` + +If you want to know in more detail, please see [examples/intdiff.cpp](https://github.com/cubicdaiya/dtl/blob/master/examples/intdiff.cpp). + +## Merge three sequences + +`dtl` has the diff3 function. + +This function is that `dtl` merges three sequences. + +Additionally `dtl` detects the confliction. + +```c++ +typedef char elem; +typedef std::string sequence; +sequence A("qqqabc"); +sequence B("abc"); +sequence C("abcdef"); +dtl::Diff3 diff3(A, B, C); +diff3.compose(); +if (!diff3.merge()) { + std::cerr << "conflict." << std::endl; + return -1; +} +std::cout << "result:" << diff3.getMergedSequence() << std::endl; +``` + +When the above code is run, the output is below. + +```console +result:qqqabcdef +``` + +If you want to know in more detail, please see [examples/strdiff3.cpp](https://github.com/cubicdaiya/dtl/blob/master/examples/strdiff3.cpp). + +## Patch function + +`dtl` can also translates one sequence to another sequence with SES. + +```c++ +typedef char elem; +typedef std::string sequence; +sequence A("abc"); +sequence B("abd"); +dtl::Diff d(A, B); +d.compose(); +string s1(A); +string s2 = d.patch(s1); +``` + +When the above code is run, s2 becomes "abd". +The SES of A("abc") and B("abd") is below. + +```console +Common a +Common b +Delete c +Add d +``` + +The patch function translates a sequence as argument with SES. +For this example, "abc" is translated to "abd" with above SES. + +Please see dtl's header files about the data structure of SES. + +## Difference as Unified Format + +`dtl` can also treat difference as Unified Format. See the example below. + +```c++ +typedef char elem; +typedef std::string sequence; +sequence A("acbdeaqqqqqqqcbed"); +sequence B("acebdabbqqqqqqqabed"); +dtl::Diff d(A, B); +d.compose(); // construct an edit distance and LCS and SES +d.composeUnifiedHunks(); // construct a difference as Unified Format with SES. +d.printUnifiedFormat(); // print a difference as Unified Format. +``` + +The difference as Unified Format of "acbdeaqqqqqqqcbed" and "acebdabbqqqqqqqabed" is below. + +```diff +@@ -1,9 +1,11 @@ + a + c ++e + b + d +-e + a ++b ++b + q + q + q +@@ -11,7 +13,7 @@ + q + q + q +-c ++a + b + e + d +``` + +The data structure Unified Format is below. + +```c++ +/** + * Structure of Unified Format Hunk + */ +template +struct uniHunk { + int a, b, c, d; // @@ -a,b +c,d @@ + std::vector common[2]; // anteroposterior commons on changes + std::vector change; // changes + int inc_dec_count; // count of increace and decrease +}; +``` + +The actual blocks of Unified Format is this structure's vector. + +If you want to know in more detail, please see [examples/unistrdiff.cpp](https://github.com/cubicdaiya/dtl/blob/master/examples/unistrdiff.cpp) +and [examples/unidiff.cpp](https://github.com/cubicdaiya/dtl/blob/master/examples/unidiff.cpp) and dtl's header files. + +In addtion, `dtl` has the function translates one sequence to another sequence with Unified Format. + +```c++ +typedef char elem; +typedef std::string sequence; +sequence A("abc"); +sequence B("abd"); +dtl::Diff d(A, B); +d.compose(); +d.composeUnifiedHunks() +string s1(A); +string s2 = d.UniPatch(s1); +``` + +When the above code is run, s2 becomes "abd". +The uniPatch function translates a sequence as argument with Unified Format blocks. + +For this example, "abc" is translated to "abd" with the Unified Format block below. + +```diff +@@ -1,3 +1,3 @@ + a + b +-c ++d +``` + +## Compare large sequences + +When compare two large sequences, `dtl` can optimizes the calculation of difference with the onHuge function. + +But this function is could use when the compared data type is std::vector. + +When you use this function, you may call this function before calling compose function. + +```c++ +typedef char elem; +typedef std::vector sequence; +sequence A; +sequence B; +/* ・・・ */ +dtl::Diff< elem, sequence > d(A, B); +d.onHuge(); +d.compose(); +``` + +## Unserious difference + +The calculation of difference is very heavy. +`dtl` uses An O(NP) Sequence Comparison Algorithm. + +Though this Algorithm is sufficiently fast, +when difference between two sequences is very large, + +the calculation of LCS and SES needs massive amounts of memory. + +`dtl` avoids above-described problem by dividing each sequence into plural subsequences +and joining the difference of each subsequence finally. + +As this way repeats allocating massive amounts of memory, +`dtl` provides other way. It is the way of calculating unserious difference. + +For example, The normal SES of "abc" and "abd" is below. + +```console +Common a +Common b +Delete c +Add d +``` + +The unserious SES of "abc" and "abd" is below. + +```console +Delete a +Delete b +Delete c +Add a +Add b +Add d +``` + +Of course, when "abc" and "abd" are compared with `dtl`, above difference is not derived. + +`dtl` calculates the unserious difference when `dtl` judges the calculation of LCS and SES +needs massive amounts of memory and unserious difference function is ON. + +`dtl` joins the calculated difference before `dtl` judges it and unserious difference finally. + +As a result, all difference is not unserious difference when unserious difference function is ON. + +When you use this function, you may call this function before calling compose function. + +```c++ +typedef char elem; +typedef std::string sequence; +sequence A("abc"); +sequence B("abd"); +dtl::Diff< elem, sequence > d(A, B); +d.onUnserious(); +d.compose(); +``` + +## Calculate only Edit Distance + +As using onOnlyEditDistance, `dtl` calculates the only edit distance. + +If you need only edit distance, you may use this function, +because the calculation of edit distance is lighter than the calculation of LCS and SES. + +When you use this function, you may call this function before calling compose function. + +```c++ +typedef char elem; +typedef std::string sequence; +sequence A("abc"); +sequence B("abd"); +dtl::Diff< elem, sequence > d(A, B); +d.onOnlyEditDistance(); +d.compose(); +``` + +# Algorithm + +The algorithm `dtl` uses is based on "An O(NP) Sequence Comparison Algorithm" by described by Sun Wu, Udi Manber and Gene Myers. + +An O(NP) Sequence Comparison Algorithm(following, Wu's O(NP) Algorithm) is the efficient algorithm for comparing two sequences. + +## Computational complexity + +The computational complexity of Wu's O(NP) Algorithm is averagely O(N+PD), in the worst case, is O(NP). + +## Comparison when difference between two sequences is very large + +Calculating LCS and SES efficiently at any time is a little difficult. + +Because that the calculation of LCS and SES needs massive amounts of memory when a difference between two sequences is very large. + +The program uses that algorithm don't consider that will burst in the worst case. + +`dtl` avoids above-described problem by dividing each sequence into plural subsequences and joining the difference of each subsequence finally.(This feature is supported after version 0.04) + +## Implementations with various programming languages + +There are the Wu's O(NP) Algorithm implementations with various programming languages below. + +https://github.com/cubicdaiya/onp + +# Examples + +There are examples in [dtl/examples](https://github.com/cubicdaiya/dtl/tree/master/examples). +`dtl` uses SCons for building examples and tests. If you build and run examples and tests, install SCons. + +## strdiff + +`strdiff` calculates a difference between two string sequences, but multi byte is not supported. + +```bash +$ cd dtl/examples +$ scons strdiff +$ ./strdiff acbdeacbed acebdabbabed +editDistance:6 +LCS:acbdabed +SES + a + c ++ e + b + d +- e + a +- c + b ++ b ++ a ++ b + e + d +$ +``` + +## intdiff + +`intdiff` calculates a diffrence between two int arrays sequences. + +```bash +$ cd dtl/examples +$ scons intdiff +$ ./intdiff # There are data in intdiff.cpp +1 2 3 4 5 6 7 8 9 10 +3 5 1 4 5 1 7 9 6 10 +editDistance:8 +LCS: 3 4 5 7 9 10 +SES +- 1 +- 2 + 3 ++ 5 ++ 1 + 4 + 5 +- 6 ++ 1 + 7 +- 8 + 9 ++ 6 + 10 +$ +``` + +## unidiff + +`unidiff` calculates a diffrence between two text file sequences, +and output the difference between files with unified format. + +```bash +$ cd dtl/examples +$ scons unidiff +$ cat a.txt +a +e +c +z +z +d +e +f +a +b +c +d +e +f +g +h +i +$ cat b.txt +a +d +e +c +f +e +a +b +c +d +e +f +g +h +i +$ ./unidiff a.txt b.txt +--- a.txt 2008-08-26 07:03:28 +0900 ++++ b.txt 2008-08-26 03:02:42 +0900 +@@ -1,11 +1,9 @@ + a +-e +-c +-z +-z + d + e ++c + f ++e + a + b + c +$ +``` + +## unistrdiff + +`unistrdiff` calculates a diffrence between two string sequences. +and output the difference between strings with unified format. + +```bash +$ cd dtl/examples +$ scons unistrdiff +$ ./unistrdiff acbdeacbed acebdabbabed +editDistance:6 +LCS:acbdabed +@@ -1,10 +1,12 @@ + a + c ++e + b + d +-e + a +-c + b ++b ++a ++b + e + d +$ +``` + +## strdiff3 + +`strdiff3` merges three string sequence and output the merged sequence. +When the confliction has occured, output the string "conflict.". + +```bash +$ cd dtl/examples +$ scons strdiff3 +$ ./strdiff3 qabc abc abcdef +result:qabcdef +$ +``` + +There is a output below when conflict occured. + +```bash +$ ./strdiff3 adc abc aec +conflict. +$ +``` + +## intdiff3 + +`intdiff3` merges three integer sequence(vector) and output the merged sequence. + +```bash +$ cd dtl/examples +$ scons intdiff3 +$ ./intdiff3 +a:1 2 3 4 5 6 7 3 9 10 +b:1 2 3 4 5 6 7 8 9 10 +c:1 2 3 9 5 6 7 8 9 10 +s:1 2 3 9 5 6 7 3 9 10 +intdiff3 OK +$ +``` + +## patch + +`patch` is the test program. Supposing that there are two strings is called by A and B, +`patch` translates A to B with Shortest Edit Script or unified format difference. + +```bash +$ cd dtl/examples +$ scons patch +$ ./patch abc abd +before:abc +after :abd +patch successed +before:abc +after :abd +unipatch successed +$ +``` + +## fpatch + +`fpatch` is the test program. Supposing that there are two files is called by A and B, +`fpatch` translates A to B with Shortest Edit Script or unified format difference. + +```bash +$ cd dtl/examples +$ scons fpatch +$ cat a.txt +a +e +c +z +z +d +e +f +a +b +c +d +e +f +g +h +i +$ cat b.txt +$ cat b.txt +a +d +e +c +f +e +a +b +c +d +e +f +g +h +i +$ ./fpatch a.txt b.txt +fpatch successed +unipatch successed +$ +``` + +# Running tests + +`dtl` uses [googletest](http://code.google.com/p/googletest/) and [SCons](http://www.scons.org/) with testing dtl-self. + +# Building test programs + +If you build test programs for `dtl`, run `scons` in test direcotry. + +```bash +$ GTEST_ROOT=${gtest_root_dir} scons +``` + +# Running test programs + +If you run all tests for `dtl`, run 'scons check' in test direcotry. (it is necessary that gtest is compiled) + +```bash +$ GTEST_ROOT=${gtest_root_dir} scons check +``` + +If you run sectional tests, you may exeucte `dtl_test` directly after you run `scons`. +Following command is the example for testing only Strdifftest. + +```bash +$ ./dtl_test --gtest_filter='Strdifftest.*' +``` + +`--gtest-filters` is the function of googletest. googletest has many useful functions for testing software flexibly. +If you want to know other functions of googletest, run `./dtl_test --help`. + + +# License + +Please read the file [COPYING](https://github.com/cubicdaiya/dtl/blob/master/COPYING). diff --git a/SConstruct b/SConstruct new file mode 100644 index 0000000..3e0b29e --- /dev/null +++ b/SConstruct @@ -0,0 +1,7 @@ +import os + +# install script + +prefix = ARGUMENTS.get('prefix', '/usr/local') +headers = Glob('dtl/*.hpp') +Alias('install', Install(os.path.join(prefix, 'dtl', 'include'), headers)) diff --git a/dtl/Diff.hpp b/dtl/Diff.hpp new file mode 100644 index 0000000..222d9ad --- /dev/null +++ b/dtl/Diff.hpp @@ -0,0 +1,692 @@ +/** + dtl -- Diff Template Library + + In short, Diff Template Library is distributed under so called "BSD license", + + Copyright (c) 2015 Tatsuhiko Kubo + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of the authors nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* If you use this library, you must include dtl.hpp only. */ + +#ifndef DTL_DIFF_H +#define DTL_DIFF_H + +namespace dtl { + + /** + * diff class template + * sequence must support random_access_iterator. + */ + template , typename comparator = Compare< elem > > + class Diff + { + private : + dtl_typedefs(elem, sequence) + sequence A; + sequence B; + size_t M; + size_t N; + size_t delta; + size_t offset; + long long *fp; + long long editDistance; + Lcs< elem > lcs; + Ses< elem > ses; + editPath path; + editPathCordinates pathCordinates; + bool swapped; + bool huge; + bool trivial; + bool editDistanceOnly; + uniHunkVec uniHunks; + comparator cmp; + public : + Diff () {} + + Diff (const sequence& a, + const sequence& b) : A(a), B(b), ses(false) { + init(); + } + + Diff (const sequence& a, + const sequence& b, + bool deletesFirst) : A(a), B(b), ses(deletesFirst) { + init(); + } + + Diff (const sequence& a, + const sequence& b, + const comparator& comp) : A(a), B(b), ses(false), cmp(comp) { + init(); + } + + Diff (const sequence& a, + const sequence& b, + bool deleteFirst, + const comparator& comp) : A(a), B(b), ses(deleteFirst), cmp(comp) { + init(); + } + + ~Diff() {} + + long long getEditDistance () const { + return editDistance; + } + + Lcs< elem > getLcs () const { + return lcs; + } + + elemVec getLcsVec () const { + return lcs.getSequence(); + } + + Ses< elem > getSes () const { + return ses; + } + + uniHunkVec getUniHunks () const { + return uniHunks; + } + + /* These should be deprecated */ + bool isHuge () const { + return huge; + } + + void onHuge () { + this->huge = true; + } + + void offHuge () { + this->huge = false; + } + + bool isUnserious () const { + return trivial; + } + + void onUnserious () { + this->trivial = true; + } + + void offUnserious () { + this->trivial = false; + } + + void onOnlyEditDistance () { + this->editDistanceOnly = true; + } + + /* These are the replacements for the above */ + bool hugeEnabled () const { + return huge; + } + + void enableHuge () { + this->huge = true; + } + + void disableHuge () { + this->huge = false; + } + + bool trivialEnabled () const { + return trivial; + } + + void enableTrivial () const { + this->trivial = true; + } + + void disableTrivial () { + this->trivial = false; + } + + void editDistanceOnlyEnabled () { + this->editDistanceOnly = true; + } + + /** + * patching with Unified Format Hunks + */ + sequence uniPatch (const sequence& seq) { + elemList seqLst(seq.begin(), seq.end()); + sesElemVec shunk; + sesElemVec_iter vsesIt; + elemList_iter lstIt = seqLst.begin(); + long long inc_dec_total = 0; + long long gap = 1; + for (uniHunkVec_iter it=uniHunks.begin();it!=uniHunks.end();++it) { + joinSesVec(shunk, it->common[0]); + joinSesVec(shunk, it->change); + joinSesVec(shunk, it->common[1]); + it->a += inc_dec_total; + inc_dec_total += it->inc_dec_count; + for (long long i=0;ia - gap;++i) { + ++lstIt; + } + gap = it->a + it->b + it->inc_dec_count; + vsesIt = shunk.begin(); + while (vsesIt!=shunk.end()) { + switch (vsesIt->second.type) { + case SES_ADD : + seqLst.insert(lstIt, vsesIt->first); + break; + case SES_DELETE : + if (lstIt != seqLst.end()) { + lstIt = seqLst.erase(lstIt); + } + break; + case SES_COMMON : + if (lstIt != seqLst.end()) { + ++lstIt; + } + break; + default : + // no fall-through + break; + } + ++vsesIt; + } + shunk.clear(); + } + + sequence patchedSeq(seqLst.begin(), seqLst.end()); + return patchedSeq; + } + + /** + * patching with Shortest Edit Script (SES) + */ + sequence patch (const sequence& seq) const { + sesElemVec sesSeq = ses.getSequence(); + elemList seqLst(seq.begin(), seq.end()); + elemList_iter lstIt = seqLst.begin(); + for (sesElemVec_iter sesIt=sesSeq.begin();sesIt!=sesSeq.end();++sesIt) { + switch (sesIt->second.type) { + case SES_ADD : + seqLst.insert(lstIt, sesIt->first); + break; + case SES_DELETE : + lstIt = seqLst.erase(lstIt); + break; + case SES_COMMON : + ++lstIt; + break; + default : + // no through + break; + } + } + sequence patchedSeq(seqLst.begin(), seqLst.end()); + return patchedSeq; + } + + /** + * compose Longest Common Subsequence and Shortest Edit Script. + * The algorithm implemented here is based on "An O(NP) Sequence Comparison Algorithm" + * described by Sun Wu, Udi Manber and Gene Myers + */ + void compose() { + + if (isHuge()) { + pathCordinates.reserve(MAX_CORDINATES_SIZE); + } + + long long p = -1; + fp = new long long[M + N + 3]; + fill(&fp[0], &fp[M + N + 3], -1); + path = editPath(M + N + 3); + fill(path.begin(), path.end(), -1); + ONP: + do { + ++p; + for (long long k=-p;k<=static_cast(delta)-1;++k) { + fp[k+offset] = snake(k, fp[k-1+offset]+1, fp[k+1+offset]); + } + for (long long k=static_cast(delta)+p;k>=static_cast(delta)+1;--k) { + fp[k+offset] = snake(k, fp[k-1+offset]+1, fp[k+1+offset]); + } + fp[delta+offset] = snake(static_cast(delta), fp[delta-1+offset]+1, fp[delta+1+offset]); + } while (fp[delta+offset] != static_cast(N) && pathCordinates.size() < MAX_CORDINATES_SIZE); + + editDistance += static_cast(delta) + 2 * p; + long long r = path[delta+offset]; + P cordinate; + editPathCordinates epc(0); + + // recording edit distance only + if (editDistanceOnly) { + delete[] this->fp; + return; + } + + while(r != -1) { + cordinate.x = pathCordinates[(size_t)r].x; + cordinate.y = pathCordinates[(size_t)r].y; + epc.push_back(cordinate); + r = pathCordinates[(size_t)r].k; + } + + // record Longest Common Subsequence & Shortest Edit Script + if (!recordSequence(epc)) { + pathCordinates.resize(0); + epc.resize(0); + p = -1; + goto ONP; + } + delete[] this->fp; + } + + /** + * print difference between A and B as an SES + */ + template < typename stream > + void printSES (stream& out) const { + sesElemVec ses_v = ses.getSequence(); + for_each(ses_v.begin(), ses_v.end(), ChangePrinter< sesElem, stream >(out)); + } + + void printSES (ostream& out = cout) const { + printSES< ostream >(out); + } + + /** + * print differences given an SES + */ + template < typename stream > + static void printSES (const Ses< elem >& s, stream& out) { + sesElemVec ses_v = s.getSequence(); + for_each(ses_v.begin(), ses_v.end(), ChangePrinter< sesElem, stream >(out)); + } + + static void printSES (const Ses< elem >& s, ostream& out = cout) { + printSES< ostream >(s, out); + } + + /** + * print difference between A and B as an SES with custom printer + */ + template < typename stream, template < typename SEET, typename STRT > class PT > + void printSES (stream& out) const { + sesElemVec ses_v = ses.getSequence (); + for_each (ses_v.begin (), ses_v.end(), PT < sesElem, stream > (out)); + } + + /** + * print difference between A and B in the Unified Format + */ + template < typename stream > + void printUnifiedFormat (stream& out) const { + for_each(uniHunks.begin(), uniHunks.end(), UniHunkPrinter< sesElem, stream >(out)); + } + + void printUnifiedFormat (ostream& out = cout) const { + printUnifiedFormat< ostream >(out); + } + + /** + * print unified format difference with given unified format hunks + */ + template < typename stream > + static void printUnifiedFormat (const uniHunkVec& hunks, stream& out) { + for_each(hunks.begin(), hunks.end(), UniHunkPrinter< sesElem >(out)); + } + + static void printUnifiedFormat (const uniHunkVec& hunks, ostream& out = cout) { + printUnifiedFormat< ostream >(hunks, out); + } + + /** + * compose Unified Format Hunks from Shortest Edit Script + */ + void composeUnifiedHunks () { + sesElemVec common[2]; + sesElemVec change; + sesElemVec ses_v = ses.getSequence(); + long long l_cnt = 1; + long long length = distance(ses_v.begin(), ses_v.end()); + long long middle = 0; + bool isMiddle, isAfter; + elemInfo einfo; + long long a, b, c, d; // @@ -a,b +c,d @@ + long long inc_dec_count = 0; + uniHunk< sesElem > hunk; + sesElemVec adds; + sesElemVec deletes; + + isMiddle = isAfter = false; + a = b = c = d = 0; + + for (sesElemVec_iter it=ses_v.begin();it!=ses_v.end();++it, ++l_cnt) { + einfo = it->second; + switch (einfo.type) { + case SES_ADD : + middle = 0; + ++inc_dec_count; + adds.push_back(*it); + if (!isMiddle) isMiddle = true; + if (isMiddle) ++d; + if (l_cnt >= length) { + joinSesVec(change, deletes); + joinSesVec(change, adds); + isAfter = true; + } + break; + case SES_DELETE : + middle = 0; + --inc_dec_count; + deletes.push_back(*it); + if (!isMiddle) isMiddle = true; + if (isMiddle) ++b; + if (l_cnt >= length) { + joinSesVec(change, deletes); + joinSesVec(change, adds); + isAfter = true; + } + break; + case SES_COMMON : + ++b;++d; + if (common[1].empty() && adds.empty() && deletes.empty() && change.empty()) { + if (static_cast(common[0].size()) < DTL_CONTEXT_SIZE) { + if (a == 0 && c == 0) { + if (!wasSwapped()) { + a = einfo.beforeIdx; + c = einfo.afterIdx; + } else { + a = einfo.afterIdx; + c = einfo.beforeIdx; + } + } + common[0].push_back(*it); + } else { + rotate(common[0].begin(), common[0].begin() + 1, common[0].end()); + common[0].pop_back(); + common[0].push_back(*it); + ++a;++c; + --b;--d; + } + } + if (isMiddle && !isAfter) { + ++middle; + joinSesVec(change, deletes); + joinSesVec(change, adds); + change.push_back(*it); + if (middle >= DTL_SEPARATE_SIZE || l_cnt >= length) { + isAfter = true; + } + adds.clear(); + deletes.clear(); + } + break; + default : + // no through + break; + } + // compose unified format hunk + if (isAfter && !change.empty()) { + sesElemVec_iter cit = it; + long long cnt = 0; + for (long long i=0;isecond.type == SES_COMMON) { + ++cnt; + } + } + if (cnt < DTL_SEPARATE_SIZE && l_cnt < length) { + middle = 0; + isAfter = false; + continue; + } + if (static_cast(common[0].size()) >= DTL_SEPARATE_SIZE) { + long long c0size = static_cast(common[0].size()); + rotate(common[0].begin(), + common[0].begin() + (size_t)c0size - DTL_SEPARATE_SIZE, + common[0].end()); + for (long long i=0;i + static Ses< elem > composeSesFromStream (stream& st) + { + elem line; + Ses< elem > ret; + long long x_idx, y_idx; + x_idx = y_idx = 1; + while (getline(st, line)) { + elem mark(line.begin(), line.begin() + 1); + elem e(line.begin() + 1, line.end()); + if (mark == SES_MARK_DELETE) { + ret.addSequence(e, x_idx, 0, SES_DELETE); + ++x_idx; + } else if (mark == SES_MARK_ADD) { + ret.addSequence(e, y_idx, 0, SES_ADD); + ++y_idx; + } else if (mark == SES_MARK_COMMON) { + ret.addSequence(e, x_idx, y_idx, SES_COMMON); + ++x_idx; + ++y_idx; + } + } + return ret; + } + + private : + /** + * initialize + */ + void init () { + M = distance(A.begin(), A.end()); + N = distance(B.begin(), B.end()); + if (M < N) { + swapped = false; + } else { + swap(A, B); + swap(M, N); + swapped = true; + } + editDistance = 0; + delta = N - M; + offset = M + 1; + huge = false; + trivial = false; + editDistanceOnly = false; + fp = NULL; + } + + /** + * search shortest path and record the path + */ + long long snake(const long long& k, const long long& above, const long long& below) { + long long r = above > below ? path[(size_t)k-1+offset] : path[(size_t)k+1+offset]; + long long y = max(above, below); + long long x = y - k; + while ((size_t)x < M && (size_t)y < N && (swapped ? cmp.impl(B[(size_t)y], A[(size_t)x]) : cmp.impl(A[(size_t)x], B[(size_t)y]))) { + ++x;++y; + } + + path[(size_t)k+offset] = static_cast(pathCordinates.size()); + if (!editDistanceOnly) { + P p; + p.x = x;p.y = y;p.k = r; + pathCordinates.push_back(p); + } + return y; + } + + /** + * record SES and LCS + */ + bool recordSequence (const editPathCordinates& v) { + sequence_const_iter x(A.begin()); + sequence_const_iter y(B.begin()); + long long x_idx, y_idx; // line number for Unified Format + long long px_idx, py_idx; // cordinates + bool complete = false; + x_idx = y_idx = 1; + px_idx = py_idx = 0; + for (size_t i=v.size()-1;!complete;--i) { + while(px_idx < v[i].x || py_idx < v[i].y) { + if (v[i].y - v[i].x > py_idx - px_idx) { + if (!wasSwapped()) { + ses.addSequence(*y, 0, y_idx, SES_ADD); + } else { + ses.addSequence(*y, y_idx, 0, SES_DELETE); + } + ++y; + ++y_idx; + ++py_idx; + } else if (v[i].y - v[i].x < py_idx - px_idx) { + if (!wasSwapped()) { + ses.addSequence(*x, x_idx, 0, SES_DELETE); + } else { + ses.addSequence(*x, 0, x_idx, SES_ADD); + } + ++x; + ++x_idx; + ++px_idx; + } else { + if (!wasSwapped()) { + lcs.addSequence(*x); + ses.addSequence(*x, x_idx, y_idx, SES_COMMON); + } else { + lcs.addSequence(*y); + ses.addSequence(*y, y_idx, x_idx, SES_COMMON); + } + ++x; + ++y; + ++x_idx; + ++y_idx; + ++px_idx; + ++py_idx; + } + } + if (i == 0) complete = true; + } + + if (x_idx > static_cast(M) && y_idx > static_cast(N)) { + // all recording succeeded + } else { + // trivial difference + if (trivialEnabled()) { + if (!wasSwapped()) { + recordOddSequence(x_idx, M, x, SES_DELETE); + recordOddSequence(y_idx, N, y, SES_ADD); + } else { + recordOddSequence(x_idx, M, x, SES_ADD); + recordOddSequence(y_idx, N, y, SES_DELETE); + } + return true; + } + + // nontrivial difference + sequence A_(A.begin() + (size_t)x_idx - 1, A.end()); + sequence B_(B.begin() + (size_t)y_idx - 1, B.end()); + A = A_; + B = B_; + M = distance(A.begin(), A.end()); + N = distance(B.begin(), B.end()); + delta = N - M; + offset = M + 1; + delete[] fp; + fp = new long long[M + N + 3]; + fill(&fp[0], &fp[M + N + 3], -1); + fill(path.begin(), path.end(), -1); + return false; + } + return true; + } + + /** + * record odd sequence in SES + */ + void inline recordOddSequence (long long idx, long long length, sequence_const_iter it, const edit_t et) { + while(idx < length){ + ses.addSequence(*it, idx, 0, et); + ++it; + ++idx; + ++editDistance; + } + ses.addSequence(*it, idx, 0, et); + ++editDistance; + } + + /** + * join SES vectors + */ + void inline joinSesVec (sesElemVec& s1, sesElemVec& s2) const { + if (!s2.empty()) { + for (sesElemVec_iter vit=s2.begin();vit!=s2.end();++vit) { + s1.push_back(*vit); + } + } + } + + /** + * check if the sequences have been swapped + */ + bool inline wasSwapped () const { + return swapped; + } + + }; +} + +#endif // DTL_DIFF_H diff --git a/dtl/Diff3.hpp b/dtl/Diff3.hpp new file mode 100644 index 0000000..854864e --- /dev/null +++ b/dtl/Diff3.hpp @@ -0,0 +1,245 @@ +/** + dtl -- Diff Template Library + + In short, Diff Template Library is distributed under so called "BSD license", + + Copyright (c) 2015 Tatsuhiko Kubo + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of the authors nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* If you use this library, you must include dtl.hpp only. */ + +#ifndef DTL_DIFF3_H +#define DTL_DIFF3_H + +namespace dtl { + + /** + * diff3 class template + * sequence must support random_access_iterator. + */ + template , typename comparator = Compare< elem > > + class Diff3 + { + private: + dtl_typedefs(elem, sequence) + sequence A; + sequence B; + sequence C; + sequence S; + Diff< elem, sequence, comparator > diff_ba; + Diff< elem, sequence, comparator > diff_bc; + bool conflict; + elem csepabegin; + elem csepa; + elem csepaend; + public : + Diff3 () {} + Diff3 (const sequence& a, + const sequence& b, + const sequence& c) : A(a), B(b), C(c), + diff_ba(b, a), diff_bc(b, c), + conflict(false) {} + + ~Diff3 () {} + + bool isConflict () const { + return conflict; + } + + sequence getMergedSequence () const { + return S; + } + + /** + * merge changes B and C into A + */ + bool merge () { + if (diff_ba.getEditDistance() == 0) { // A == B + if (diff_bc.getEditDistance() == 0) { // A == B == C + S = B; + return true; + } + S = C; + return true; + } else { // A != B + if (diff_bc.getEditDistance() == 0) { // A != B == C + S = A; + return true; + } else { // A != B != C + S = merge_(); + if (isConflict()) { // conflict occured + return false; + } + } + } + return true; + } + + /** + * compose differences + */ + void compose () { + diff_ba.compose(); + diff_bc.compose(); + } + + private : + /** + * merge implementation + */ + sequence merge_ () { + elemVec seq; + Ses< elem > ses_ba = diff_ba.getSes(); + Ses< elem > ses_bc = diff_bc.getSes(); + sesElemVec ses_ba_v = ses_ba.getSequence(); + sesElemVec ses_bc_v = ses_bc.getSequence(); + sesElemVec_iter ba_it = ses_ba_v.begin(); + sesElemVec_iter bc_it = ses_bc_v.begin(); + sesElemVec_iter ba_end = ses_ba_v.end(); + sesElemVec_iter bc_end = ses_bc_v.end(); + + while (!isEnd(ba_end, ba_it) || !isEnd(bc_end, bc_it)) { + while (true) { + if (!isEnd(ba_end, ba_it) && + !isEnd(bc_end, bc_it) && + ba_it->first == bc_it->first && + ba_it->second.type == SES_COMMON && + bc_it->second.type == SES_COMMON) { + // do nothing + } else { + break; + } + if (!isEnd(ba_end, ba_it)) seq.push_back(ba_it->first); + else if (!isEnd(bc_end, bc_it)) seq.push_back(bc_it->first); + forwardUntilEnd(ba_end, ba_it); + forwardUntilEnd(bc_end, bc_it); + } + if (isEnd(ba_end, ba_it) || isEnd(bc_end, bc_it)) break; + if ( ba_it->second.type == SES_COMMON + && bc_it->second.type == SES_DELETE) { + forwardUntilEnd(ba_end, ba_it); + forwardUntilEnd(bc_end, bc_it); + } else if (ba_it->second.type == SES_COMMON && + bc_it->second.type == SES_ADD) { + seq.push_back(bc_it->first); + forwardUntilEnd(bc_end, bc_it); + } else if (ba_it->second.type == SES_DELETE && + bc_it->second.type == SES_COMMON) { + forwardUntilEnd(ba_end, ba_it); + forwardUntilEnd(bc_end, bc_it); + } else if (ba_it->second.type == SES_DELETE && + bc_it->second.type == SES_DELETE) { + if (ba_it->first == bc_it->first) { + forwardUntilEnd(ba_end, ba_it); + forwardUntilEnd(bc_end, bc_it); + } else { + // conflict + conflict = true; + return B; + } + } else if (ba_it->second.type == SES_DELETE && + bc_it->second.type == SES_ADD) { + // conflict + conflict = true; + return B; + } else if (ba_it->second.type == SES_ADD && + bc_it->second.type == SES_COMMON) { + seq.push_back(ba_it->first); + forwardUntilEnd(ba_end, ba_it); + } else if (ba_it->second.type == SES_ADD && + bc_it->second.type == SES_DELETE) { + // conflict + conflict = true; + return B; + } else if (ba_it->second.type == SES_ADD && + bc_it->second.type == SES_ADD) { + if (ba_it->first == bc_it->first) { + seq.push_back(ba_it->first); + forwardUntilEnd(ba_end, ba_it); + forwardUntilEnd(bc_end, bc_it); + } else { + // conflict + conflict = true; + return B; + } + } + } + + if (isEnd(ba_end, ba_it)) { + addDecentSequence(bc_end, bc_it, seq); + } else if (isEnd(bc_end, bc_it)) { + addDecentSequence(ba_end, ba_it, seq); + } + + sequence mergedSeq(seq.begin(), seq.end()); + return mergedSeq; + } + + /** + * join elem vectors + */ + void inline joinElemVec (elemVec& s1, elemVec& s2) const { + if (!s2.empty()) { + for (elemVec_iter vit=s2.begin();vit!=s2.end();++vit) { + s1.push_back(*vit); + } + } + } + + /** + * check if sequence is at end + */ + template + bool inline isEnd (const T_iter& end, const T_iter& it) const { + return it == end ? true : false; + } + + /** + * increment iterator until iterator is at end + */ + template + void inline forwardUntilEnd (const T_iter& end, T_iter& it) const { + if (!isEnd(end, it)) ++it; + } + + /** + * add elements whose SES's type is ADD + */ + void inline addDecentSequence (const sesElemVec_iter& end, sesElemVec_iter& it, elemVec& seq) const { + while (!isEnd(end, it)) { + if (it->second.type == SES_ADD) seq.push_back(it->first); + ++it; + } + } + + }; +} + +#endif // DTL_DIFF3_H diff --git a/dtl/Lcs.hpp b/dtl/Lcs.hpp new file mode 100644 index 0000000..e47c238 --- /dev/null +++ b/dtl/Lcs.hpp @@ -0,0 +1,55 @@ +/** + dtl -- Diff Template Library + + In short, Diff Template Library is distributed under so called "BSD license", + + Copyright (c) 2015 Tatsuhiko Kubo + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of the authors nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* If you use this library, you must include dtl.hpp only. */ + +#ifndef DTL_LCS_H +#define DTL_LCS_H + +namespace dtl { + + /** + * Longest Common Subsequence template class + */ + template + class Lcs : public Sequence< elem > + { + public : + Lcs () {} + ~Lcs () {} + }; +} + +#endif // DTL_LCS_H diff --git a/dtl/Sequence.hpp b/dtl/Sequence.hpp new file mode 100644 index 0000000..eeab0ed --- /dev/null +++ b/dtl/Sequence.hpp @@ -0,0 +1,65 @@ +/** + dtl -- Diff Template Library + + In short, Diff Template Library is distributed under so called "BSD license", + + Copyright (c) 2015 Tatsuhiko Kubo + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of the authors nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* If you use this library, you must include dtl.hpp only. */ + +#ifndef DTL_SEQUENCE_H +#define DTL_SEQUENCE_H + +namespace dtl { + + /** + * sequence class template + */ + template + class Sequence + { + public : + typedef vector< elem > elemVec; + Sequence () {} + virtual ~Sequence () {} + + elemVec getSequence () const { + return sequence; + } + void addSequence (elem e) { + sequence.push_back(e); + } + protected : + elemVec sequence; + }; +} + +#endif // DTL_SEQUENCE_H diff --git a/dtl/Ses.hpp b/dtl/Ses.hpp new file mode 100644 index 0000000..281144e --- /dev/null +++ b/dtl/Ses.hpp @@ -0,0 +1,132 @@ +/** + dtl -- Diff Template Library + + In short, Diff Template Library is distributed under so called "BSD license", + + Copyright (c) 2015 Tatsuhiko Kubo + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of the authors nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* If you use this library, you must include dtl.hpp only. */ + +#ifndef DTL_SES_H +#define DTL_SES_H + +namespace dtl { + + /** + * Shortest Edit Script template class + */ + template + class Ses : public Sequence< elem > + { + private : + typedef pair< elem, elemInfo > sesElem; + typedef vector< sesElem > sesElemVec; + public : + + Ses () : onlyAdd(true), onlyDelete(true), onlyCopy(true), deletesFirst(false) { + nextDeleteIdx = 0; + } + Ses (bool moveDel) : onlyAdd(true), onlyDelete(true), onlyCopy(true), deletesFirst(moveDel) { + nextDeleteIdx = 0; + } + ~Ses () {} + + bool isOnlyAdd () const { + return onlyAdd; + } + + bool isOnlyDelete () const { + return onlyDelete; + } + + bool isOnlyCopy () const { + return onlyCopy; + } + + bool isOnlyOneOperation () const { + return isOnlyAdd() || isOnlyDelete() || isOnlyCopy(); + } + + bool isChange () const { + return !onlyCopy; + } + + using Sequence< elem >::addSequence; + void addSequence (elem e, long long beforeIdx, long long afterIdx, const edit_t type) { + elemInfo info; + info.beforeIdx = beforeIdx; + info.afterIdx = afterIdx; + info.type = type; + sesElem pe(e, info); + if (!deletesFirst) { + sequence.push_back(pe); + } + switch (type) { + case SES_DELETE: + onlyCopy = false; + onlyAdd = false; + if (deletesFirst) { + sequence.insert(sequence.begin() + nextDeleteIdx, pe); + nextDeleteIdx++; + } + break; + case SES_COMMON: + onlyAdd = false; + onlyDelete = false; + if (deletesFirst) { + sequence.push_back(pe); + nextDeleteIdx = sequence.size(); + } + break; + case SES_ADD: + onlyDelete = false; + onlyCopy = false; + if (deletesFirst) { + sequence.push_back(pe); + } + break; + } + } + + sesElemVec getSequence () const { + return sequence; + } + private : + sesElemVec sequence; + bool onlyAdd; + bool onlyDelete; + bool onlyCopy; + bool deletesFirst; + size_t nextDeleteIdx; + }; +} + +#endif // DTL_SES_H diff --git a/dtl/dtl.hpp b/dtl/dtl.hpp new file mode 100644 index 0000000..b6231ba --- /dev/null +++ b/dtl/dtl.hpp @@ -0,0 +1,47 @@ +/** + dtl -- Diff Template Library + + In short, Diff Template Library is distributed under so called "BSD license", + + Copyright (c) 2015 Tatsuhiko Kubo + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of the authors nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef DTL_H +#define DTL_H + +#include "variables.hpp" +#include "functors.hpp" +#include "Sequence.hpp" +#include "Lcs.hpp" +#include "Ses.hpp" +#include "Diff.hpp" +#include "Diff3.hpp" + +#endif // DTL_H diff --git a/dtl/functors.hpp b/dtl/functors.hpp new file mode 100644 index 0000000..a4da5e5 --- /dev/null +++ b/dtl/functors.hpp @@ -0,0 +1,137 @@ +/** + dtl -- Diff Template Library + + In short, Diff Template Library is distributed under so called "BSD license", + + Copyright (c) 2015 Tatsuhiko Kubo + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of the authors nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* If you use this library, you must include dtl.hpp only. */ + +#ifndef DTL_FUNCTORS_H +#define DTL_FUNCTORS_H + +namespace dtl { + + /** + * printer class template + */ + template + class Printer + { + public : + Printer () : out_(cout) {} + Printer (stream& out) : out_(out) {} + virtual ~Printer () {} + virtual void operator() (const sesElem& se) const = 0; + protected : + stream& out_; + }; + + /** + * common element printer class template + */ + template + class CommonPrinter : public Printer < sesElem, stream > + { + public : + CommonPrinter () : Printer < sesElem, stream > () {} + CommonPrinter (stream& out) : Printer < sesElem, stream > (out) {} + ~CommonPrinter () {} + void operator() (const sesElem& se) const { + this->out_ << SES_MARK_COMMON << se.first << endl; + } + }; + + /** + * ses element printer class template + */ + template + class ChangePrinter : public Printer < sesElem, stream > + { + public : + ChangePrinter () : Printer < sesElem, stream > () {} + ChangePrinter (stream& out) : Printer < sesElem, stream > (out) {} + ~ChangePrinter () {} + void operator() (const sesElem& se) const { + switch (se.second.type) { + case SES_ADD: + this->out_ << SES_MARK_ADD << se.first << endl; + break; + case SES_DELETE: + this->out_ << SES_MARK_DELETE << se.first << endl; + break; + case SES_COMMON: + this->out_ << SES_MARK_COMMON << se.first << endl; + break; + } + } + }; + + /** + * unified format element printer class template + */ + template + class UniHunkPrinter + { + public : + UniHunkPrinter () : out_(cout) {} + UniHunkPrinter (stream& out) : out_(out) {} + ~UniHunkPrinter () {} + void operator() (const uniHunk< sesElem >& hunk) const { + out_ << "@@" + << " -" << hunk.a << "," << hunk.b + << " +" << hunk.c << "," << hunk.d + << " @@" << endl; + + for_each(hunk.common[0].begin(), hunk.common[0].end(), CommonPrinter< sesElem, stream >(out_)); + for_each(hunk.change.begin(), hunk.change.end(), ChangePrinter< sesElem, stream >(out_)); + for_each(hunk.common[1].begin(), hunk.common[1].end(), CommonPrinter< sesElem, stream >(out_)); + } + private : + stream& out_; + }; + + /** + * compare class template + */ + template + class Compare + { + public : + Compare () {} + virtual ~Compare () {} + virtual inline bool impl (const elem& e1, const elem& e2) const { + return e1 == e2; + } + }; +} + +#endif // DTL_FUNCTORS_H diff --git a/dtl/variables.hpp b/dtl/variables.hpp new file mode 100644 index 0000000..cbf5a54 --- /dev/null +++ b/dtl/variables.hpp @@ -0,0 +1,142 @@ +/** + dtl -- Diff Template Library + + In short, Diff Template Library is distributed under so called "BSD license", + + Copyright (c) 2015 Tatsuhiko Kubo + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of the authors nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* If you use this library, you must include dtl.hpp only. */ + +#ifndef DTL_VARIABLES_H +#define DTL_VARIABLES_H + +#include +#include +#include +#include +#include + +namespace dtl { + + using std::vector; + using std::string; + using std::pair; + using std::ostream; + using std::list; + using std::for_each; + using std::distance; + using std::fill; + using std::cout; + using std::endl; + using std::rotate; + using std::swap; + using std::max; + + /** + * version string + */ + const string version = "1.19"; + + /** + * type of edit for SES + */ + typedef int edit_t; + const edit_t SES_DELETE = -1; + const edit_t SES_COMMON = 0; + const edit_t SES_ADD = 1; + + /** + * mark of SES + */ +#define SES_MARK_DELETE "-" +#define SES_MARK_COMMON " " +#define SES_MARK_ADD "+" + + /** + * info for Unified Format + */ + typedef struct eleminfo { + long long beforeIdx; // index of prev sequence + long long afterIdx; // index of after sequence + edit_t type; // type of edit(Add, Delete, Common) + bool operator==(const eleminfo& other) const{ + return (this->beforeIdx == other.beforeIdx && this->afterIdx == other.afterIdx && this->type == other.type); + } + } elemInfo; + + const long long DTL_SEPARATE_SIZE = 3; + const long long DTL_CONTEXT_SIZE = 3; + + /** + * cordinate for registering route + */ + typedef struct Point { + long long x; // x cordinate + long long y; // y cordinate + long long k; // vertex + } P; + + /** + * limit of cordinate size + */ + const unsigned long long MAX_CORDINATES_SIZE = 2000000; + + typedef vector< long long > editPath; + typedef vector< P > editPathCordinates; + + /** + * Structure of Unified Format Hunk + */ + template + struct uniHunk { + long long a, b, c, d; // @@ -a,b +c,d @@ + vector< sesElem > common[2]; // anteroposterior commons on changes + vector< sesElem > change; // changes + long long inc_dec_count; // count of increace and decrease + }; + +#define dtl_typedefs(elem, sequence) \ + typedef pair< elem, elemInfo > sesElem; \ + typedef vector< sesElem > sesElemVec; \ + typedef vector< uniHunk< sesElem > > uniHunkVec; \ + typedef list< elem > elemList; \ + typedef vector< elem > elemVec; \ + typedef typename uniHunkVec::iterator uniHunkVec_iter; \ + typedef typename sesElemVec::iterator sesElemVec_iter; \ + typedef typename elemList::iterator elemList_iter; \ + typedef typename sequence::iterator sequence_iter; \ + typedef typename sequence::const_iterator sequence_const_iter; \ + typedef typename elemVec::iterator elemVec_iter; + + +} + +#endif // DTL_VARIABLES_H diff --git a/examples/SConstruct b/examples/SConstruct new file mode 100644 index 0000000..07c4809 --- /dev/null +++ b/examples/SConstruct @@ -0,0 +1,42 @@ +import os + +# SConstruct for dtl examples + +env = Environment(CPPPATH='..') +debug = ARGUMENTS.get('debug', 'n') +if debug == 'y' or debug == 'yes': + env.Append(CPPFLAGS = ['-Wall', '-g']) +else: + env.Append(CPPFLAGS = ['-Wall', '-O2']) + +if os.sys.platform != "win32": + env.Append(CPPDEFINES = ['HAVE_UNISTD_H']) + +conf = Configure(env); + +if not conf.CheckCXX(): + print "The C++ compiler is not installed!" + Exit(1) + +libs = ['stdc++'] +for lib in libs: + if not conf.CheckLib(lib): + print "library " + lib + " not installed!" + Exit(1) + +conf.Finish() + +targets = { 'strdiff' : ['strdiff.cpp', 'common.cpp'], # diff between two string sequences + 'intdiff' : ['intdiff.cpp'], # diff between two integer sequences + 'unidiff' : ['unidiff.cpp', 'common.cpp'], # unified diff between two files + 'unistrdiff' : ['unistrdiff.cpp', 'common.cpp'], # unified diff between two strings + 'bdiff' : ['bdiff.cpp', 'common.cpp'], # diff between two byte sequences + 'strdiff3' : ['strdiff3.cpp', 'common.cpp'], # three-way string diff program using dtl + 'intdiff3' : ['intdiff3.cpp'], # three-way integer diff program using dtl + 'patch' : ['patch.cpp', 'common.cpp'], # string patch program using dtl + 'fpatch' : ['fpatch.cpp', 'common.cpp'], # file patch program using dtl + 'st2ses' : ['st2ses.cpp', 'common.cpp'], # convert SES format file to SES instance + 'strdiff_cp' : ['strdiff_cp.cpp', 'common.cpp'], # diff between two string sequences with custom printer + } + +[ env.Program(target, targets[target]) for target in targets ] diff --git a/examples/bdiff.cpp b/examples/bdiff.cpp new file mode 100644 index 0000000..5108bd2 --- /dev/null +++ b/examples/bdiff.cpp @@ -0,0 +1,73 @@ + +#include +#include "common.hpp" + +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif // HAVE_UNISTD_H + +using namespace std; + +using dtl::Diff; + +typedef unsigned char elem; +typedef vector< elem > sequence; + +static int create_byte_seq(const char *fs, sequence& seq); +static int create_byte_seq(const char *fs, sequence& seq) +{ + int fd; + int siz; + elem buf[BUFSIZ]; + if ((fd = open(fs, O_RDONLY)) == -1) { + cout << "Opening failed." << endl; + return -1; + } + while ((siz = read(fd, buf, sizeof(buf))) > 0) { + for (int i=0;i d(seq1, seq2); + d.compose(); + + if (d.getEditDistance() == 0) { + cout << fs1 << " is the same as " << fs2 << endl; + } else { + cout << fs1 << " is different from " << fs2 << endl; + } + + return 0; +} diff --git a/examples/common.cpp b/examples/common.cpp new file mode 100644 index 0000000..cd2a9b6 --- /dev/null +++ b/examples/common.cpp @@ -0,0 +1,20 @@ + +#include "common.hpp" + +bool isFileExist (string& fs) { + FILE *fp; + if ((fp = fopen(fs.c_str(), "r")) == NULL) { + return false; + } + fclose(fp); + return true; +} + +bool isFewArgs (int argc, int limit) { + bool ret = false; + if (argc < limit) { + ret = true; + } + return ret; +} + diff --git a/examples/common.hpp b/examples/common.hpp new file mode 100644 index 0000000..7159841 --- /dev/null +++ b/examples/common.hpp @@ -0,0 +1,13 @@ + +#ifndef DTL_EXAMPLE_COMMON_H +#define DTL_EXAMPLE_COMMON_H + +#include +#include + +using namespace std; + +bool isFileExist (string& fs); +bool isFewArgs (int argc, int limit = 3); + +#endif diff --git a/examples/fpatch.cpp b/examples/fpatch.cpp new file mode 100644 index 0000000..eef90de --- /dev/null +++ b/examples/fpatch.cpp @@ -0,0 +1,73 @@ + +#include +#include "common.hpp" +#include +#include +#include +#include +#include + +using namespace std; + +using dtl::Diff; + +int main(int argc, char *argv[]){ + + if (isFewArgs(argc)) { + cerr << "Too few arguments." << endl; + return -1; + } + + string A(argv[1]); + string B(argv[2]); + bool fileExist = true; + + if (!isFileExist(A)) { + cerr << "file A does not exist" << endl; + fileExist = false; + } + + if (!isFileExist(B)) { + cerr << "file B does not exist" << endl; + fileExist = false; + } + + if (!fileExist) { + return -1; + } + + typedef string elem; + typedef vector< elem > sequence; + + ifstream Aifs(A.c_str()); + ifstream Bifs(B.c_str()); + elem buf; + sequence ALines, BLines; + ostringstream ossLine, ossInfo; + + while(getline(Aifs, buf)){ + ALines.push_back(buf); + } + while(getline(Bifs, buf)){ + BLines.push_back(buf); + } + + Diff< elem > d(ALines, BLines); + d.compose(); + + sequence s1 = ALines; + sequence s2 = d.patch(s1); + + // fpatch + assert(BLines == s2); + cout << "fpatch succeeded" << endl; + + d.composeUnifiedHunks(); + sequence s3 = d.uniPatch(s1); + + // unipatch + assert(BLines == s3); + cout << "unipatch succeeded" << endl; + + return 0; +} diff --git a/examples/intdiff.cpp b/examples/intdiff.cpp new file mode 100644 index 0000000..4478065 --- /dev/null +++ b/examples/intdiff.cpp @@ -0,0 +1,49 @@ + +#include +#include +#include + +using namespace std; + +using dtl::Diff; + +int main(int, char**){ + + int a[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + int b[] = {3, 5, 1, 4, 5, 1, 7, 9, 6, 10}; + int asiz = sizeof(a) / sizeof(int); + int bsiz = sizeof(b) / sizeof(int); + for (int i=0;i sequence; + + sequence A(&a[0], &a[asiz]); + sequence B(&b[0], &b[bsiz]); + Diff< elem > d(A, B); + d.compose(); + + // editDistance + cout << "editDistance:" << d.getEditDistance() << endl; + + // Longest Common Subsequence + sequence lcs_v = d.getLcsVec(); + cout << "LCS: "; + for (sequence::iterator vit=lcs_v.begin();vit!=lcs_v.end();++vit) { + cout << *vit << " "; + } + cout << endl; + + // Shortest Edit Script + cout << "SES" << endl; + d.printSES(); + + return 0; +} diff --git a/examples/intdiff3.cpp b/examples/intdiff3.cpp new file mode 100644 index 0000000..3c215fc --- /dev/null +++ b/examples/intdiff3.cpp @@ -0,0 +1,57 @@ + +#include +#include +#include +#include + +using namespace std; + +using dtl::Diff3; + +int main(int, char**) { + + int a[10] = {1, 2, 3, 4, 5, 6, 7, 3, 9, 10}; + int b[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + int c[10] = {1, 2, 3, 9, 5, 6, 7, 8, 9, 10}; + int answer[10] = {1, 2, 3, 9, 5, 6, 7, 3, 9, 10}; + + cout << "a:"; + for (int i=0;i<10;++i) { + cout << a[i] << " "; + } + cout << endl; + cout << "b:"; + for (int i=0;i<10;++i) { + cout << b[i] << " "; + } + cout << endl; + cout << "c:"; + for (int i=0;i<10;++i) { + cout << c[i] << " "; + } + cout << endl; + + typedef int elem; + typedef vector< int > sequence; + sequence A(&a[0], &a[10]); + sequence B(&b[0], &b[10]); + sequence C(&c[0], &c[10]); + sequence Answer(&answer[0], &answer[10]); + Diff3< elem > diff3(A, B, C); + diff3.compose(); + if (!diff3.merge()) { + cerr << "conflict." << endl; + return -1; + } + sequence s = diff3.getMergedSequence(); + cout << "s:"; + for (sequence::iterator it=s.begin();it!=s.end();++it) { + cout << *it << " "; + } + cout << endl; + + assert(s == Answer); + cout << "intdiff3 OK" << endl; + + return 0; +} diff --git a/examples/patch.cpp b/examples/patch.cpp new file mode 100644 index 0000000..909ed11 --- /dev/null +++ b/examples/patch.cpp @@ -0,0 +1,44 @@ + +#include +#include "common.hpp" +#include +#include +#include + +using namespace std; + +using dtl::Diff; + +int main(int argc, char *argv[]) { + + if (isFewArgs(argc)) { + cerr << "Too few arguments." << endl; + return -1; + } + + typedef char elem; + typedef string sequence; + + sequence A(argv[1]); + sequence B(argv[2]); + + Diff< elem, sequence > d(A, B); + d.compose(); + + sequence s1(A); + sequence s2 = d.patch(s1); + d.composeUnifiedHunks(); + sequence s3 = d.uniPatch(s1); + + cout << "before:" << s1 << endl; + cout << "after :" << s2 << endl; + assert(B == s2); + cout << "patch succeeded" << endl; + + cout << "before:" << s1 << endl; + cout << "after :" << s3 << endl; + assert(B == s3); + cout << "unipatch succeeded" << endl; + + return 0; +} diff --git a/examples/printers.hpp b/examples/printers.hpp new file mode 100644 index 0000000..82da6ca --- /dev/null +++ b/examples/printers.hpp @@ -0,0 +1,26 @@ +#ifndef DTL_PRINTERS +#define DTL_PRINTERS + +template +class customChangePrinter : public Printer < sesElem, stream > +{ +public : + customChangePrinter () : Printer < sesElem, stream > () {} + customChangePrinter (stream& out) : Printer < sesElem, stream > (out) {} + ~customChangePrinter () {} + void operator() (const sesElem& se) const { + switch (se.second.type) { + case SES_ADD: + this->out_ << "Add: " << se.first << endl; + break; + case SES_DELETE: + this->out_ << "Delete: " << se.first << endl; + break; + case SES_COMMON: + this->out_ << "Common: " << se.first << endl; + break; + } + } +}; + +#endif // DTL_PRINTERS diff --git a/examples/st2ses.cpp b/examples/st2ses.cpp new file mode 100644 index 0000000..1c1b88b --- /dev/null +++ b/examples/st2ses.cpp @@ -0,0 +1,35 @@ + +#include +#include "common.hpp" +#include +#include +#include +#include +#include + +using namespace std; +using namespace dtl; + +int main(int argc, char *argv[]){ + + if (isFewArgs(argc, 2)) { + cerr << "Too few arguments." << endl; + return -1; + } + + typedef string elem; + typedef vector< string > sequence; + + string s(argv[1]); + + if (!isFileExist(s)) { + cerr << s << " is invalid." << endl; + return -1; + } + + ifstream fs(s.c_str()); + const Ses< elem > ses = Diff< elem, sequence >::composeSesFromStream< ifstream >(fs); + dtl::Diff< elem, sequence >::printSES(ses); + + return 0; +} diff --git a/examples/strdiff.cpp b/examples/strdiff.cpp new file mode 100644 index 0000000..d5abc6a --- /dev/null +++ b/examples/strdiff.cpp @@ -0,0 +1,42 @@ + +#include +#include "common.hpp" +#include +#include +#include + +using namespace std; + +using dtl::Diff; + +int main(int argc, char *argv[]){ + + if (isFewArgs(argc)) { + cerr << "Too few arguments." << endl; + return -1; + } + + typedef char elem; + typedef string sequence; + + sequence A(argv[1]); + sequence B(argv[2]); + + Diff< elem, sequence > d(A, B); + //d.onOnlyEditDistance(); + d.compose(); + + // editDistance + cout << "editDistance:" << d.getEditDistance() << endl; + + // Longest Common Subsequence + vector< elem > lcs_v = d.getLcsVec(); + sequence lcs_s(lcs_v.begin(), lcs_v.end()); + cout << "LCS:" << lcs_s << endl; + + // Shortest Edit Script + cout << "SES" << endl; + d.printSES(); + + return 0; +} diff --git a/examples/strdiff3.cpp b/examples/strdiff3.cpp new file mode 100644 index 0000000..048f37b --- /dev/null +++ b/examples/strdiff3.cpp @@ -0,0 +1,35 @@ + +#include +#include "common.hpp" +#include +#include +#include + +using namespace std; + +using dtl::Diff3; + +int main(int argc, char *argv[]){ + + if (isFewArgs(argc, 4)) { + cerr << "Too few arguments." << endl; + return -1; + } + + typedef char elem; + typedef string sequence; + + sequence A(argv[1]); + sequence B(argv[2]); + sequence C(argv[3]); + + Diff3< elem, sequence > diff3(A, B, C); + diff3.compose(); + if (!diff3.merge()) { + cerr << "conflict." << endl; + return 0; + } + cout << "result:" << diff3.getMergedSequence() << endl; + + return 0; +} diff --git a/examples/strdiff_cp.cpp b/examples/strdiff_cp.cpp new file mode 100644 index 0000000..17c146c --- /dev/null +++ b/examples/strdiff_cp.cpp @@ -0,0 +1,40 @@ + +#include +#include "common.hpp" +#include +#include +#include + +using namespace std; + +using dtl::Diff; +using dtl::SES_ADD; +using dtl::SES_DELETE; +using dtl::SES_COMMON; +using dtl::Printer; + +#include "printers.hpp" + +int main(int argc, char *argv[]){ + + if (isFewArgs(argc)) { + cerr << "Too few arguments." << endl; + return -1; + } + + typedef char elem; + typedef string sequence; + + sequence A(argv[1]); + sequence B(argv[2]); + + Diff< elem, sequence > d(A, B); + d.compose(); + + // Shortest Edit Script + cout << "SES" << endl; + + d.printSES < ostream, customChangePrinter > (cout); + + return 0; +} diff --git a/examples/unidiff.cpp b/examples/unidiff.cpp new file mode 100644 index 0000000..fe1ae92 --- /dev/null +++ b/examples/unidiff.cpp @@ -0,0 +1,111 @@ + +#include +#include "common.hpp" +#include +#include +#include +#include + +#include +#include + +using namespace std; + +using dtl::Diff; +using dtl::elemInfo; +using dtl::uniHunk; + +static void showStats (string fp1, string fp2); +static void unifiedDiff (string fp1, string fp2); + +static void showStats (string fp1, string fp2) +{ + const int MAX_LENGTH = 255; + char time_format[] = "%Y-%m-%d %H:%M:%S %z"; + time_t rawtime[2]; + struct tm *timeinfo[2]; + struct stat st[2]; + + if (stat(fp1.c_str(), &st[0]) == -1) { + cerr << "argv1 is invalid." << endl; + exit(-1); + } + if (stat(fp2.c_str(), &st[1]) == -1) { + cerr << "argv2 is invalid" << endl; + exit(-1); + } + + char buf[2][MAX_LENGTH + 1]; + rawtime[0] = st[0].st_mtime; + timeinfo[0] = localtime(&rawtime[0]); + strftime(buf[0], MAX_LENGTH, time_format, timeinfo[0]); + cout << "--- " << fp1 << '\t' << buf[0] << endl; + rawtime[1] = st[1].st_mtime; + timeinfo[1] = localtime(&rawtime[1]); + strftime(buf[1], MAX_LENGTH, time_format, timeinfo[1]); + cout << "+++ " << fp2 << '\t' << buf[1] << endl; +} + +static void unifiedDiff (string fp1, string fp2) +{ + typedef string elem; + typedef vector< elem > sequence; + typedef pair< elem, elemInfo > sesElem; + + ifstream Aifs(fp1.c_str()); + ifstream Bifs(fp2.c_str()); + elem buf; + sequence ALines, BLines; + + while(getline(Aifs, buf)){ + ALines.push_back(buf); + } + while(getline(Bifs, buf)){ + BLines.push_back(buf); + } + + Diff< elem > diff(ALines, BLines); + diff.onHuge(); + //diff.onUnserious(); + diff.compose(); + + // type unihunk definition test + uniHunk< sesElem > hunk; + + if (diff.getEditDistance() > 0) { + showStats(fp1, fp2); // show file info + } + + diff.composeUnifiedHunks(); + diff.printUnifiedFormat(); +} + + +int main(int argc, char *argv[]) +{ + if (isFewArgs(argc)) { + cerr << "Too few arguments." << endl; + return -1; + } + + string s1(argv[1]); + string s2(argv[2]); + bool fileExist = true; + + if (!isFileExist(s1)) { + cerr << s1 << " is invalid." << endl; + fileExist = false; + } + + if (!isFileExist(s2)) { + cerr << s2 << " is invalid." << endl; + fileExist = false; + } + + if (!fileExist) { + return -1; + } + + unifiedDiff(s1, s2); + return 0; +} diff --git a/examples/unistrdiff.cpp b/examples/unistrdiff.cpp new file mode 100644 index 0000000..66726a5 --- /dev/null +++ b/examples/unistrdiff.cpp @@ -0,0 +1,40 @@ + +#include +#include "common.hpp" +#include +#include + +using namespace std; + +using dtl::Diff; + +int main(int argc, char *argv[]){ + + if (isFewArgs(argc)) { + cerr << "Too few arguments." << endl; + return -1; + } + + typedef char elem; + typedef string sequence; + + sequence A(argv[1]); + sequence B(argv[2]); + + Diff d(A, B); + d.compose(); + d.composeUnifiedHunks(); + + // editDistance + cout << "editDistance:" << d.getEditDistance() << endl; + + // Longest Common Subsequence + vector lcs_v = d.getLcsVec(); + sequence lcs_s(lcs_v.begin(), lcs_v.end()); + cout << "LCS:" << lcs_s << endl; + + // print Unified Format + d.printUnifiedFormat(); + + return 0; +} diff --git a/test/Intdifftest.cpp b/test/Intdifftest.cpp new file mode 100644 index 0000000..301ca76 --- /dev/null +++ b/test/Intdifftest.cpp @@ -0,0 +1,144 @@ +#include "dtl_test_common.hpp" + +class Intdifftest : public ::testing::Test +{ +protected : + dtl_test_typedefs(int, vector< int >) + typedef struct case_t { + sequence A; + sequence B; + size_t editdis; + elemVec lcs_v; + sequence lcs_s; + sesElemVec ses_seq; + uniHunkVec hunk_v; + size_t editdis_ses; + size_t editdis_uni; + string path_rses; + string path_rhunks; + } case_t; + typedef vector< case_t > caseVec; + caseVec cases; + + case_t createCase (const sequence a, const sequence b, string test_name) { + case_t c; + string diff_name("intdiff"); + Diff< elem > diff(a, b); + diff.compose(); + diff.composeUnifiedHunks(); + + if (test_name != "") { + string path_lses = create_path(test_name, diff_name, TYPE_DIFF_SES); + string path_rses = create_path(test_name, diff_name, TYPE_DIFF_SES, true); + string path_lhunks = create_path(test_name, diff_name, TYPE_DIFF_UNI); + string path_rhunks = create_path(test_name, diff_name, TYPE_DIFF_UNI, true); + + create_file< elem, sequence, Compare< elem > >(path_rses, diff, TYPE_DIFF_SES); + create_file< elem, sequence, Compare< elem > >(path_rhunks, diff, TYPE_DIFF_UNI); + c.editdis_ses = cal_diff_uni(path_lses, path_rses); + c.editdis_uni = cal_diff_uni(path_lhunks, path_rhunks); + c.path_rses = path_rses; + c.path_rhunks = path_rhunks; + } + + + c.A = a; + c.B = b; + c.editdis = diff.getEditDistance(); + c.lcs_v = diff.getLcsVec(); + c.ses_seq = diff.getSes().getSequence(); + return c; + } + + void SetUp() { + cases.push_back(createCase(sequence(0), sequence(0), "diff_test0")); + sequence B1; + B1.push_back(1); + cases.push_back(createCase(sequence(0), B1, "diff_test1")); + sequence A2; + A2.push_back(1); + cases.push_back(createCase(A2, sequence(0), "diff_test2")); + int a4[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + int b4[] = {3, 5, 1, 4, 5, 1, 7, 9, 6, 10}; + int a4siz = sizeof(a4) / sizeof(int); + int b4siz = sizeof(b4) / sizeof(int); + sequence A4(&a4[0], &a4[a4siz]); + sequence B4(&b4[0], &b4[b4siz]); + cases.push_back(createCase(A4, B4, "diff_test3")); + int a5[] = {1, 2, 3, 4, 5}; + int b5[] = {3, 5, 1, 4, 5}; + int a5siz = sizeof(a5) / sizeof(int); + int b5siz = sizeof(b5) / sizeof(int); + sequence A5(&a5[0], &a5[a5siz]); + sequence B5(&b5[0], &b5[b5siz]); + cases.push_back(createCase(A5, B5, "diff_test4")); + } + + void TearDown () { + for_each(cases.begin(), cases.end(), Remover< case_t >()); + } + +}; + +/** + * Intdifftest + * check list is following + * - editdistance + * - LCS + * - SES + */ +TEST_F (Intdifftest, diff_test0) { + EXPECT_EQ(0, cases[0].editdis); + + EXPECT_TRUE(cases[0].lcs_v.empty()); + + ASSERT_EQ(0, cases[0].editdis_ses); + + ASSERT_EQ(0, cases[0].editdis_uni); +} + +TEST_F (Intdifftest, diff_test1) { + EXPECT_EQ(1, cases[1].editdis); + + EXPECT_TRUE(cases[1].lcs_v.empty()); + + ASSERT_EQ(0, cases[1].editdis_ses); + + ASSERT_EQ(0, cases[1].editdis_uni); +} + +TEST_F (Intdifftest, diff_test2) { + EXPECT_EQ(1, cases[2].editdis); + + EXPECT_TRUE(cases[2].lcs_v.empty()); + + ASSERT_EQ(0, cases[2].editdis_ses); + + ASSERT_EQ(0, cases[2].editdis_uni); +} + +TEST_F (Intdifftest, diff_test3) { + EXPECT_EQ(8, cases[3].editdis); + + EXPECT_EQ(3, cases[3].lcs_v[0]); + EXPECT_EQ(4, cases[3].lcs_v[1]); + EXPECT_EQ(5, cases[3].lcs_v[2]); + EXPECT_EQ(7, cases[3].lcs_v[3]); + EXPECT_EQ(9, cases[3].lcs_v[4]); + + ASSERT_EQ(0, cases[3].editdis_ses); + + ASSERT_EQ(0, cases[3].editdis_uni); +} + +TEST_F (Intdifftest, diff_test4) { + EXPECT_EQ(4, cases[4].editdis); + + EXPECT_EQ(3, cases[4].lcs_v[0]); + EXPECT_EQ(4, cases[4].lcs_v[1]); + EXPECT_EQ(5, cases[4].lcs_v[2]); + + ASSERT_EQ(0, cases[4].editdis_ses); + + ASSERT_EQ(0, cases[4].editdis_uni); +} diff --git a/test/Objdifftest.cpp b/test/Objdifftest.cpp new file mode 100644 index 0000000..2a729dc --- /dev/null +++ b/test/Objdifftest.cpp @@ -0,0 +1,116 @@ +#include "dtl_test_common.hpp" +#include "comparators.hpp" + +class Objdifftest : public ::testing::Test +{ +protected : + dtl_test_typedefs(string, vector) + typedef struct case_t { + sequence A; + sequence B; + sesElemVec expected; + sesElemVec ses_seq; + } case_t; + typedef vector< case_t > caseVec; + + caseVec obj_diff_cases; + + template < typename comparator > + case_t createCase (const sequence a, const sequence b, sesElemVec ses, string test_name) { + case_t c; + elemVec lcs_v; + string diff_name("objdiff"); + + Diff< elem, sequence, comparator > diff(a, b, true); + + diff.compose(); + + c.A = a; + c.B = b; + c.ses_seq = diff.getSes().getSequence(); + c.expected = ses; + + return c; + } + + void SetUp(void) { + { + string array1[] = {"the", "quick", "brown"}; + string array2[] = {"The", "Quick", "Fox"}; + + sequence A(array1, array1 + (sizeof(array1) / sizeof(array1[0]))); + sequence B(array2, array2 + (sizeof(array2) / sizeof(array2[0]))); + + dtl::Ses< elem > ses; + ses.addSequence("the", 1, 1, dtl::SES_COMMON); + ses.addSequence("quick", 2, 2, dtl::SES_COMMON); + ses.addSequence("brown", 3, 0, dtl::SES_DELETE); + ses.addSequence("Fox", 0, 3, dtl::SES_ADD); + + obj_diff_cases.push_back(createCase< StringCaseInsensitive >(A, B, ses.getSequence(), "objdiff_test0_pattern")); + } + + { + string array1[] = {"b", "c", "e", "g"}; + string array2[] = {"a", "d", "e", "f", "h"}; + + sequence A(array1, array1 + (sizeof(array1) / sizeof(array1[0]))); + sequence B(array2, array2 + (sizeof(array2) / sizeof(array2[0]))); + + dtl::Ses< elem > ses; + ses.addSequence("b", 1, 0, dtl::SES_DELETE); + ses.addSequence("c", 2, 0, dtl::SES_DELETE); + ses.addSequence("a", 0, 1, dtl::SES_ADD); + ses.addSequence("d", 0, 2, dtl::SES_ADD); + ses.addSequence("e", 3, 3, dtl::SES_COMMON); + ses.addSequence("g", 4, 0, dtl::SES_DELETE); + ses.addSequence("f", 0, 4, dtl::SES_ADD); + ses.addSequence("h", 0, 5, dtl::SES_ADD); + + obj_diff_cases.push_back(createCase< StringCaseInsensitive >(A, B, ses.getSequence(), "objdiff_test1_unswapped")); + } + { + string array1[] = {"a", "d", "e", "f", "h"}; + string array2[] = {"b", "c", "e", "g"}; + + sequence A(array1, array1 + (sizeof(array1) / sizeof(array1[0]))); + sequence B(array2, array2 + (sizeof(array2) / sizeof(array2[0]))); + + dtl::Ses< elem > ses; + ses.addSequence("a", 1, 0, dtl::SES_DELETE); + ses.addSequence("d", 2, 0, dtl::SES_DELETE); + ses.addSequence("b", 0, 1, dtl::SES_ADD); + ses.addSequence("c", 0, 2, dtl::SES_ADD); + ses.addSequence("e", 3, 3, dtl::SES_COMMON); + ses.addSequence("f", 4, 0, dtl::SES_DELETE); + ses.addSequence("h", 5, 0, dtl::SES_DELETE); + ses.addSequence("g", 0, 4, dtl::SES_ADD); + + obj_diff_cases.push_back(createCase< StringCaseInsensitive >(A, B, ses.getSequence(), "objdiff_test2_swapped")); + } + } + + void TearDown () { + //for_each(obj_diff_cases.begin(), obj_diff_cases.end(), Remover< case_t >()); + } +}; + + +/** + * Objdifftest + * check list: + * - SES pattern "SES_COMMON, SES_DELETE, SES_ADD" + * - Indepence of results from swapping + */ + +TEST_F (Objdifftest, objdiff_test0_pattern) { + EXPECT_EQ(obj_diff_cases[0].expected, obj_diff_cases[0].ses_seq); +} + +TEST_F (Objdifftest, objdiff_test1_unswapped) { + EXPECT_EQ(obj_diff_cases[1].expected, obj_diff_cases[1].ses_seq); +} + +TEST_F (Objdifftest, objdiff_test2_swapped) { + EXPECT_EQ(obj_diff_cases[2].expected, obj_diff_cases[2].ses_seq); +} diff --git a/test/Patchtest.cpp b/test/Patchtest.cpp new file mode 100644 index 0000000..44c10f5 --- /dev/null +++ b/test/Patchtest.cpp @@ -0,0 +1,98 @@ +#include "dtl_test_common.hpp" + +class Patchtest : public ::testing::Test +{ +protected : + dtl_test_typedefs(char, string) + typedef struct case_t { + sequence A; + sequence B; + Diff< elem, sequence > diff; + } case_t; + typedef vector< case_t > caseVec; + + caseVec cases; + + case_t createCase (sequence a, sequence b) { + case_t c; + c.A = a; + c.B = b; + c.diff = Diff< elem, sequence >(a, b); + c.diff.compose(); + c.diff.composeUnifiedHunks(); + return c; + } + + void SetUp() { + cases.push_back(createCase("abc", "abd")); // 0 + cases.push_back(createCase("acbdeacbed", "acebdabbabed")); // 1 + cases.push_back(createCase("abcdef", "dacfea")); // 2 + cases.push_back(createCase("abcbda", "bdcaba")); // 3 + cases.push_back(createCase("bokko", "bokkko")); // 4 + cases.push_back(createCase("", "")); // 5 + cases.push_back(createCase("a", "")); // 6 + cases.push_back(createCase("", "b")); // 7 + cases.push_back(createCase("abcdefq3wefarhgorequgho4euhfteowauhfwehogfewrquhoi23hroewhoahfotrhguoiewahrgqqabcdef", + "3abcdef4976fd86ouofita67t85r876e5e746578tgliuhopoqqabcdef")); // 8 + cases.push_back(createCase("abcqqqeqqqccc", "abdqqqeqqqddd")); // 9 + } + + void TearDown () {} + +}; + +/** + * Patchtest + * check list is following + * - patch function + * - uniPatch function + */ +TEST_F (Patchtest, patch_test0) { + ASSERT_EQ(cases[0].B, cases[0].diff.patch(cases[0].A)); + ASSERT_EQ(cases[0].B, cases[0].diff.uniPatch(cases[0].A)); +} + +TEST_F (Patchtest, patch_test1) { + ASSERT_EQ(cases[1].B, cases[1].diff.patch(cases[1].A)); + ASSERT_EQ(cases[1].B, cases[1].diff.uniPatch(cases[1].A)); +} + +TEST_F (Patchtest, patch_test2) { + ASSERT_EQ(cases[2].B, cases[2].diff.patch(cases[2].A)); + ASSERT_EQ(cases[2].B, cases[2].diff.uniPatch(cases[2].A)); +} + +TEST_F (Patchtest, patch_test3) { + ASSERT_EQ(cases[3].B, cases[3].diff.patch(cases[3].A)); + ASSERT_EQ(cases[3].B, cases[3].diff.uniPatch(cases[3].A)); +} + +TEST_F (Patchtest, patch_test4) { + ASSERT_EQ(cases[4].B, cases[4].diff.patch(cases[4].A)); + ASSERT_EQ(cases[4].B, cases[4].diff.uniPatch(cases[4].A)); +} + +TEST_F (Patchtest, patch_test5) { + ASSERT_EQ(cases[5].B, cases[5].diff.patch(cases[5].A)); + ASSERT_EQ(cases[5].B, cases[5].diff.uniPatch(cases[5].A)); +} + +TEST_F (Patchtest, patch_test6) { + ASSERT_EQ(cases[6].B, cases[6].diff.patch(cases[6].A)); + ASSERT_EQ(cases[6].B, cases[6].diff.uniPatch(cases[6].A)); +} + +TEST_F (Patchtest, patch_test7) { + ASSERT_EQ(cases[7].B, cases[7].diff.patch(cases[7].A)); + ASSERT_EQ(cases[7].B, cases[7].diff.uniPatch(cases[7].A)); +} + +TEST_F (Patchtest, patch_test8) { + ASSERT_EQ(cases[8].B, cases[8].diff.patch(cases[8].A)); + ASSERT_EQ(cases[8].B, cases[8].diff.uniPatch(cases[8].A)); +} + +TEST_F (Patchtest, patch_test9) { + ASSERT_EQ(cases[9].B, cases[9].diff.patch(cases[9].A)); + ASSERT_EQ(cases[9].B, cases[9].diff.uniPatch(cases[9].A)); +} diff --git a/test/SConstruct b/test/SConstruct new file mode 100644 index 0000000..d5ced04 --- /dev/null +++ b/test/SConstruct @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +# SConstrunct for dtl test + +import os + +def path_chomp(path): + if path[-1] == '/': + return path[:-1] + return path + +if not 'GTEST_ROOT' in os.environ: + print "set environment variable 'GTEST_ROOT'" + Exit(1) + +gtest_root = path_chomp(os.environ['GTEST_ROOT']) +if gtest_root[-1] == '/': + gtest_root = gtest_root[:-1] + +gtest_includes = gtest_root + '/include' +gtest_libs = gtest_root + '/lib/.libs' + +flags = ['-Wall', '-O2'] +libs = ['stdc++', 'pthread'] +includes = ['..', gtest_includes] +target = 'dtl_test' + +env = Environment(CPPFLAGS=flags, + CPPPATH=includes, + LIBPATH=[gtest_libs], + ) + +conf = Configure(env); + +if not conf.CheckCXX(): + print "c++ compiler is not installed!" + Exit(1) + +for lib in libs: + if not conf.CheckLib(lib): + print "library " + lib + " not installed!" + Exit(1) + +conf.Finish() + +test = env.Program(target, + [Glob('*.cpp'), gtest_libs + '/libgtest.a'] + ) + +test_alias = env.Alias('check', + test, + test[0].abspath) + +env.AlwaysBuild(test_alias) diff --git a/test/Strdiff3test.cpp b/test/Strdiff3test.cpp new file mode 100644 index 0000000..0517815 --- /dev/null +++ b/test/Strdiff3test.cpp @@ -0,0 +1,208 @@ +#include "dtl_test_common.hpp" +#include "comparators.hpp" + +class Strdiff3test : public ::testing::Test +{ +protected : + dtl_test_typedefs(char, string) + typedef struct case_t { + sequence S; + bool is_merge_success; + sequence merged_seq; + } case_t; + typedef vector< case_t > caseVec; + + caseVec merge_cases; + caseVec detect_cases; + caseVec custom_cases; + + template < typename comparator > + case_t createCase (sequence a, sequence b, sequence c, sequence s) { + Diff3< elem, sequence, comparator > diff3(a, b, c); + case_t ct; + + diff3.compose(); + + ct.S = s; + ct.is_merge_success = diff3.merge(); + ct.merged_seq = diff3.getMergedSequence(); + return ct; + } + + void SetUp() { + // merge test + merge_cases.push_back(createCase< Compare < elem > >("ab", "b", "bc", "abc")); // 0 + merge_cases.push_back(createCase< Compare < elem > >("bc", "b", "ab", "abc")); // 1 + merge_cases.push_back(createCase< Compare < elem > >("qqqabc", "abc", "abcdef", "qqqabcdef")); // 2 + merge_cases.push_back(createCase< Compare < elem > >("abcdef", "abc", "qqqabc", "qqqabcdef")); // 3 + merge_cases.push_back(createCase< Compare < elem > >("aaacccbbb", "aaabbb", "aaabbbqqq", "aaacccbbbqqq")); // 4 + merge_cases.push_back(createCase< Compare < elem > >("aaabbbqqq", "aaabbb", "aaacccbbb", "aaacccbbbqqq")); // 5 + merge_cases.push_back(createCase< Compare < elem > >("aeaacccbbb", "aaabbb", "aaabbbqqq", "aeaacccbbbqqq")); // 6 + merge_cases.push_back(createCase< Compare < elem > >("aaabbbqqq", "aaabbb", "aeaacccbbb", "aeaacccbbbqqq")); // 7 + merge_cases.push_back(createCase< Compare < elem > >("aeaacccbbb", "aaabbb", "aaabebbqqq", "aeaacccbebbqqq")); // 8 + merge_cases.push_back(createCase< Compare < elem > >("aaabebbqqq", "aaabbb", "aeaacccbbb", "aeaacccbebbqqq")); // 9 + merge_cases.push_back(createCase< Compare < elem > >("aaacccbbb", "aaabbb", "aeaabbbqqq", "aeaacccbbbqqq")); // 10 + merge_cases.push_back(createCase< Compare < elem > >("aeaabbbqqq", "aaabbb", "aaacccbbb", "aeaacccbbbqqq")); // 11 + merge_cases.push_back(createCase< Compare < elem > >("aaacccbbb", "aaabbb", "aaabeebbeeqqq","aaacccbeebbeeqqq")); // 12 + merge_cases.push_back(createCase< Compare < elem > >("aaabeebbeeqqq", "aaabbb", "aaacccbbb", "aaacccbeebbeeqqq")); // 13 + merge_cases.push_back(createCase< Compare < elem > >("aiueo", "aeo", "aeKokaki", "aiueKokaki")); // 14 + merge_cases.push_back(createCase< Compare < elem > >("aeKokaki", "aeo", "aiueo", "aiueKokaki")); // 15 + merge_cases.push_back(createCase< Compare < elem > >("1234567390", "1234567890", "1239567890", "1239567390")); // 16 + merge_cases.push_back(createCase< Compare < elem > >("1239567890", "1234567890", "1234567390", "1239567390")); // 17 + merge_cases.push_back(createCase< Compare < elem > >("qabcdef", "abcdef", "ab", "qab")); // 18 + merge_cases.push_back(createCase< Compare < elem > >("ab", "abcdef", "qabcdef", "qab")); // 19 + merge_cases.push_back(createCase< Compare < elem > >("abcdf", "abcdef", "acdef", "acdf")); // 20 + merge_cases.push_back(createCase< Compare < elem > >("acdef", "abcdef", "abcdf", "acdf")); // 21 + merge_cases.push_back(createCase< Compare < elem > >("acdef", "abcdef", "abcdfaa", "acdfaa")); // 22 + merge_cases.push_back(createCase< Compare < elem > >("abcdfaa", "abcdef", "acdef", "acdfaa")); // 23 + + // detect confliction test + detect_cases.push_back(createCase< Compare < elem > >("adc", "abc", "aec", "")); // 0 + detect_cases.push_back(createCase< Compare < elem > >("abqdcf", "abcdef", "abqqef", "")); // 1 + + // use custom comparator + custom_cases.push_back(createCase< CaseInsensitive >("abc", "abc", "abC", "abc")); + } + + void TearDown () {} + +}; + +/** + * Strdiff3test + * check list is following + * - merge function + * - detect confliction + */ +TEST_F (Strdiff3test, merge_test0) { + ASSERT_TRUE(merge_cases[0].is_merge_success); + ASSERT_EQ(merge_cases[0].S, merge_cases[0].merged_seq); +} + +TEST_F (Strdiff3test, merge_test1) { + ASSERT_TRUE(merge_cases[1].is_merge_success); + ASSERT_EQ(merge_cases[1].S, merge_cases[1].merged_seq); +} + +TEST_F (Strdiff3test, merge_test2) { + ASSERT_TRUE(merge_cases[2].is_merge_success); + ASSERT_EQ(merge_cases[2].S, merge_cases[2].merged_seq); +} + +TEST_F (Strdiff3test, merge_test3) { + ASSERT_TRUE(merge_cases[3].is_merge_success); + ASSERT_EQ(merge_cases[3].S, merge_cases[3].merged_seq); +} + +TEST_F (Strdiff3test, merge_test4) { + ASSERT_TRUE(merge_cases[4].is_merge_success); + ASSERT_EQ(merge_cases[4].S, merge_cases[4].merged_seq); +} + +TEST_F (Strdiff3test, merge_test5) { + ASSERT_TRUE(merge_cases[5].is_merge_success); + ASSERT_EQ(merge_cases[5].S, merge_cases[5].merged_seq); +} + +TEST_F (Strdiff3test, merge_test6) { + ASSERT_TRUE(merge_cases[6].is_merge_success); + ASSERT_EQ(merge_cases[6].S, merge_cases[6].merged_seq); +} + +TEST_F (Strdiff3test, merge_test7) { + ASSERT_TRUE(merge_cases[7].is_merge_success); + ASSERT_EQ(merge_cases[7].S, merge_cases[7].merged_seq); +} + +TEST_F (Strdiff3test, merge_test8) { + ASSERT_TRUE(merge_cases[8].is_merge_success); + ASSERT_EQ(merge_cases[8].S, merge_cases[8].merged_seq); +} + +TEST_F (Strdiff3test, merge_test9) { + ASSERT_TRUE(merge_cases[9].is_merge_success); + ASSERT_EQ(merge_cases[9].S, merge_cases[9].merged_seq); +} + +TEST_F (Strdiff3test, merge_test10) { + ASSERT_TRUE(merge_cases[10].is_merge_success); + ASSERT_EQ(merge_cases[10].S, merge_cases[10].merged_seq); +} + +TEST_F (Strdiff3test, merge_test11) { + ASSERT_TRUE(merge_cases[11].is_merge_success); + ASSERT_EQ(merge_cases[11].S, merge_cases[11].merged_seq); +} + +TEST_F (Strdiff3test, merge_test12) { + ASSERT_TRUE(merge_cases[12].is_merge_success); + ASSERT_EQ(merge_cases[12].S, merge_cases[12].merged_seq); +} + +TEST_F (Strdiff3test, merge_test13) { + ASSERT_TRUE(merge_cases[13].is_merge_success); + ASSERT_EQ(merge_cases[13].S, merge_cases[13].merged_seq); +} + +TEST_F (Strdiff3test, merge_test14) { + ASSERT_TRUE(merge_cases[14].is_merge_success); + ASSERT_EQ(merge_cases[14].S, merge_cases[14].merged_seq); +} + +TEST_F (Strdiff3test, merge_test15) { + ASSERT_TRUE(merge_cases[15].is_merge_success); + ASSERT_EQ(merge_cases[15].S, merge_cases[15].merged_seq); +} + +TEST_F (Strdiff3test, merge_test16) { + ASSERT_TRUE(merge_cases[16].is_merge_success); + ASSERT_EQ(merge_cases[16].S, merge_cases[16].merged_seq); +} + +TEST_F (Strdiff3test, merge_test17) { + ASSERT_TRUE(merge_cases[17].is_merge_success); + ASSERT_EQ(merge_cases[17].S, merge_cases[17].merged_seq); +} + +TEST_F (Strdiff3test, merge_test18) { + ASSERT_TRUE(merge_cases[18].is_merge_success); + ASSERT_EQ(merge_cases[18].S, merge_cases[18].merged_seq); +} + +TEST_F (Strdiff3test, merge_test19) { + ASSERT_TRUE(merge_cases[19].is_merge_success); + ASSERT_EQ(merge_cases[19].S, merge_cases[19].merged_seq); +} + +TEST_F (Strdiff3test, merge_test20) { + ASSERT_TRUE(merge_cases[20].is_merge_success); + ASSERT_EQ(merge_cases[20].S, merge_cases[20].merged_seq); +} + +TEST_F (Strdiff3test, merge_test21) { + ASSERT_TRUE(merge_cases[21].is_merge_success); + ASSERT_EQ( merge_cases[21].S, merge_cases[21].merged_seq); +} + +TEST_F (Strdiff3test, merge_test22) { + ASSERT_TRUE(merge_cases[22].is_merge_success); + ASSERT_EQ( merge_cases[22].S, merge_cases[22].merged_seq); +} + +TEST_F (Strdiff3test, merge_test23) { + ASSERT_TRUE(merge_cases[23].is_merge_success); + ASSERT_EQ(merge_cases[23].S, merge_cases[23].merged_seq); +} + +TEST_F (Strdiff3test, detect_confliction_test0) { + ASSERT_FALSE(detect_cases[0].is_merge_success); +} + +TEST_F (Strdiff3test, detect_confliction_test1) { + ASSERT_FALSE(detect_cases[1].is_merge_success); +} + +TEST_F (Strdiff3test, custom_comparator_test0) { + ASSERT_TRUE(custom_cases[0].is_merge_success); + ASSERT_EQ(custom_cases[0].S, custom_cases[0].merged_seq); +} diff --git a/test/Strdifftest.cpp b/test/Strdifftest.cpp new file mode 100644 index 0000000..99ca0a8 --- /dev/null +++ b/test/Strdifftest.cpp @@ -0,0 +1,299 @@ +#include "dtl_test_common.hpp" +#include "comparators.hpp" + +class Strdifftest : public ::testing::Test +{ +protected : + dtl_test_typedefs(char, string) + typedef struct case_t { + sequence A; + sequence B; + size_t editdis; + elemVec lcs_v; + sequence lcs_s; + sesElemVec ses_seq; + uniHunkVec hunk_v; + size_t editdis_ses; + size_t editdis_uni; + string path_rses; + string path_rhunks; + } case_t; + typedef vector< case_t > caseVec; + + caseVec diff_cases; + caseVec only_editdis_cases; + caseVec custom_cases; + + template < typename comparator > + case_t createCase (const sequence a, const sequence b, string test_name, bool onlyEditdis = false) { + case_t c; + elemVec lcs_v; + string diff_name("strdiff"); + + Diff< elem, sequence, comparator > diff(a, b); + if (onlyEditdis) { + diff.onOnlyEditDistance(); + } + + diff.compose(); + diff.composeUnifiedHunks(); + lcs_v = diff.getLcsVec(); + + if (test_name != "") { + string path_lses = create_path(test_name, diff_name, TYPE_DIFF_SES); + string path_rses = create_path(test_name, diff_name, TYPE_DIFF_SES, true); + string path_lhunks = create_path(test_name, diff_name, TYPE_DIFF_UNI); + string path_rhunks = create_path(test_name, diff_name, TYPE_DIFF_UNI, true); + diff_resultset_exist_check(path_lses); + diff_resultset_exist_check(path_lhunks); + + create_file< elem, sequence, comparator >(path_rses, diff, TYPE_DIFF_SES); + create_file< elem, sequence, comparator >(path_rhunks, diff, TYPE_DIFF_UNI); + + c.editdis_ses = cal_diff_uni(path_lses, path_rses); + c.editdis_uni = cal_diff_uni(path_lhunks, path_rhunks); + c.path_rses = path_rses; + c.path_rhunks = path_rhunks; + } + + c.A = a; + c.B = b; + c.editdis = diff.getEditDistance(); + c.lcs_s = sequence(lcs_v.begin(), lcs_v.end()); + c.ses_seq = diff.getSes().getSequence(); + c.hunk_v = diff.getUniHunks(); + + return c; + } + + void SetUp(void) { + diff_cases.push_back(createCase< Compare< elem > >("abc", "abd", "diff_test0")); + diff_cases.push_back(createCase< Compare< elem > >("acbdeacbed", "acebdabbabed", "diff_test1")); + diff_cases.push_back(createCase< Compare< elem > >("abcdef", "dacfea", "diff_test2")); + diff_cases.push_back(createCase< Compare< elem > >("abcbda", "bdcaba", "diff_test3")); + diff_cases.push_back(createCase< Compare< elem > >("bokko", "bokkko", "diff_test4")); + diff_cases.push_back(createCase< Compare< elem > >("", "", "diff_test5")); + diff_cases.push_back(createCase< Compare< elem > >("a", "", "diff_test6")); + diff_cases.push_back(createCase< Compare< elem > >("", "b", "diff_test7")); + diff_cases.push_back(createCase< Compare< elem > >("acbdeaqqqqqqqcbed", "acebdabbqqqqqqqabed", "diff_test8")); + + only_editdis_cases.push_back(createCase< Compare< elem > >("abc", "abd", "", true)); + only_editdis_cases.push_back(createCase< Compare< elem > >("acbdeacbed", "acebdabbabed", "", true)); + only_editdis_cases.push_back(createCase< Compare< elem > >("abcdef", "dacfea", "", true)); + only_editdis_cases.push_back(createCase< Compare< elem > >("abcbda", "bdcaba", "", true)); + only_editdis_cases.push_back(createCase< Compare< elem > >("bokko", "bokkko", "", true)); + only_editdis_cases.push_back(createCase< Compare< elem > >("", "", "", true)); + only_editdis_cases.push_back(createCase< Compare< elem > >("a", "", "", true)); + only_editdis_cases.push_back(createCase< Compare< elem > >("", "b", "", true)); + only_editdis_cases.push_back(createCase< Compare< elem > >("acbdeaqqqqqqqcbed", "acebdabbqqqqqqqabed", "", true)); + + custom_cases.push_back(createCase< CaseInsensitive >("abc", "Abc", "custom_test0")); + } + + void TearDown () { + for_each(diff_cases.begin(), diff_cases.end(), Remover< case_t >()); + for_each(custom_cases.begin(), custom_cases.end(), Remover< case_t >()); + } +}; + + +/** + * Strdifftest + * check list is following + * - editdistance + * - LCS + * - SES + * - Unified Format Difference + * - onOnlyEditDistance + */ + +TEST_F (Strdifftest, diff_test0) { + + EXPECT_EQ(2, diff_cases[0].editdis); + + EXPECT_EQ("ab", diff_cases[0].lcs_s); + + ASSERT_EQ(0, diff_cases[0].editdis_ses); + + ASSERT_EQ(0, diff_cases[0].editdis_uni); +} + +TEST_F (Strdifftest, diff_test1) { + EXPECT_EQ(6, diff_cases[1].editdis); + + EXPECT_EQ("acbdabed", diff_cases[1].lcs_s); + + ASSERT_EQ(0, diff_cases[1].editdis_ses); + + ASSERT_EQ(0, diff_cases[1].editdis_uni); +} + +TEST_F (Strdifftest, diff_test2) { + EXPECT_EQ(6, diff_cases[2].editdis); + + EXPECT_EQ("acf", diff_cases[2].lcs_s); + + ASSERT_EQ(0, diff_cases[2].editdis_ses); + + ASSERT_EQ(0, diff_cases[2].editdis_uni); +} + +TEST_F (Strdifftest, diff_test3) { + EXPECT_EQ(4, diff_cases[3].editdis); + + EXPECT_EQ("bcba", diff_cases[3].lcs_s); + + ASSERT_EQ(0, diff_cases[3].editdis_ses); + + ASSERT_EQ(0, diff_cases[3].editdis_uni); +} + +TEST_F (Strdifftest, diff_test4) { + EXPECT_EQ(1, diff_cases[4].editdis); + + EXPECT_EQ("bokko", diff_cases[4].lcs_s); + + ASSERT_EQ(0, diff_cases[4].editdis_ses); + + ASSERT_EQ(0, diff_cases[4].editdis_uni); +} + +TEST_F (Strdifftest, diff_test5) { + EXPECT_EQ(0, diff_cases[5].editdis); + + EXPECT_EQ("", diff_cases[5].lcs_s); + + ASSERT_EQ(0, diff_cases[5].editdis_ses); + + ASSERT_EQ(0, diff_cases[5].editdis_uni); +} + +TEST_F (Strdifftest, diff_test6) { + EXPECT_EQ(1, diff_cases[6].editdis); + + EXPECT_EQ("", diff_cases[6].lcs_s); + + ASSERT_EQ(0, diff_cases[6].editdis_ses); + + ASSERT_EQ(0, diff_cases[6].editdis_uni); +} + +TEST_F (Strdifftest, diff_test7) { + EXPECT_EQ(1, diff_cases[7].editdis); + + EXPECT_EQ("", diff_cases[7].lcs_s); + + ASSERT_EQ(0, diff_cases[7].editdis_ses); + + ASSERT_EQ(0, diff_cases[7].editdis_uni); +} + +TEST_F (Strdifftest, diff_test8) { + EXPECT_EQ(6, diff_cases[8].editdis); + + EXPECT_EQ("acbdaqqqqqqqbed", diff_cases[8].lcs_s); + + ASSERT_EQ(0, diff_cases[8].editdis_ses); + + ASSERT_EQ(0, diff_cases[8].editdis_uni); +} + +TEST_F (Strdifftest, only_editdis_test0) { + EXPECT_EQ(2, only_editdis_cases[0].editdis); + + EXPECT_EQ("", only_editdis_cases[0].lcs_s); + + ASSERT_TRUE(only_editdis_cases[0].ses_seq.empty()); + + ASSERT_TRUE(only_editdis_cases[0].hunk_v.empty()); +} + +TEST_F (Strdifftest, only_editdis_test1) { + EXPECT_EQ(6, only_editdis_cases[1].editdis); + + EXPECT_EQ("", only_editdis_cases[1].lcs_s); + + ASSERT_TRUE(only_editdis_cases[1].ses_seq.empty()); + + ASSERT_TRUE(only_editdis_cases[1].hunk_v.empty()); +} + +TEST_F (Strdifftest, only_editdis_test2) { + EXPECT_EQ(6, only_editdis_cases[2].editdis); + + EXPECT_EQ("", only_editdis_cases[2].lcs_s); + + ASSERT_TRUE(only_editdis_cases[2].ses_seq.empty()); + + ASSERT_TRUE(only_editdis_cases[2].hunk_v.empty()); +} + +TEST_F (Strdifftest, only_editdis_test3) { + EXPECT_EQ(4, only_editdis_cases[3].editdis); + + EXPECT_EQ("", only_editdis_cases[3].lcs_s); + + ASSERT_TRUE(only_editdis_cases[3].ses_seq.empty()); + + ASSERT_TRUE(only_editdis_cases[3].hunk_v.empty()); +} + +TEST_F (Strdifftest, only_editdis_test4) { + EXPECT_EQ(1, only_editdis_cases[4].editdis); + + EXPECT_EQ("", only_editdis_cases[4].lcs_s); + + ASSERT_TRUE(only_editdis_cases[4].ses_seq.empty()); + + ASSERT_TRUE(only_editdis_cases[4].hunk_v.empty()); +} + +TEST_F (Strdifftest, only_editdis_test5) { + EXPECT_EQ(0, only_editdis_cases[5].editdis); + + EXPECT_EQ("", only_editdis_cases[5].lcs_s); + + ASSERT_TRUE(only_editdis_cases[5].ses_seq.empty()); + + ASSERT_TRUE(only_editdis_cases[5].hunk_v.empty()); +} + +TEST_F (Strdifftest, only_editdis_test6) { + EXPECT_EQ(1, only_editdis_cases[6].editdis); + + EXPECT_EQ("", only_editdis_cases[6].lcs_s); + + ASSERT_TRUE(only_editdis_cases[6].ses_seq.empty()); + + ASSERT_TRUE(only_editdis_cases[6].hunk_v.empty()); +} + +TEST_F (Strdifftest, only_editdis_test7) { + EXPECT_EQ(1, only_editdis_cases[7].editdis); + + EXPECT_EQ("", only_editdis_cases[7].lcs_s); + + ASSERT_TRUE(only_editdis_cases[7].ses_seq.empty()); + + ASSERT_TRUE(only_editdis_cases[7].hunk_v.empty()); +} + +TEST_F (Strdifftest, only_editdis_test8) { + EXPECT_EQ(6, only_editdis_cases[8].editdis); + + EXPECT_EQ("", only_editdis_cases[8].lcs_s); + + ASSERT_TRUE(only_editdis_cases[8].ses_seq.empty()); + + ASSERT_TRUE(only_editdis_cases[8].hunk_v.empty()); +} + +TEST_F (Strdifftest, custom_comparator_test0) { + EXPECT_EQ(0, custom_cases[0].editdis); + + EXPECT_EQ("abc", custom_cases[0].lcs_s); + + ASSERT_EQ(0, custom_cases[0].editdis_ses); + + ASSERT_TRUE(custom_cases[0].hunk_v.empty()); +} diff --git a/test/comparators.hpp b/test/comparators.hpp new file mode 100644 index 0000000..bc13b0f --- /dev/null +++ b/test/comparators.hpp @@ -0,0 +1,24 @@ +#ifndef DTL_COMPARATORS +#define DTL_COMPARATORS + +class CaseInsensitive: public dtl::Compare { +public: + virtual bool impl(const char& a, const char& b) const { + return tolower(a) == tolower(b); + } +}; + +class StringCaseInsensitive: public dtl::Compare { +public: + virtual bool impl(const string& a, const string& b) const { + if (a.length() == b.length()) { + bool equal = (strncasecmp(a.c_str(), b.c_str(), a.length()) == 0); + return equal; + } + else { + return false; + } + } +}; + +#endif // DTL_COMPARATORS diff --git a/test/dtl_test.cpp b/test/dtl_test.cpp new file mode 100644 index 0000000..d2c20b4 --- /dev/null +++ b/test/dtl_test.cpp @@ -0,0 +1,10 @@ +/** + * It is necessary to use googletest to run tests. + */ + +#include + +int main (int argc, char *argv[]) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/test/dtl_test_common.cpp b/test/dtl_test_common.cpp new file mode 100644 index 0000000..f727a1f --- /dev/null +++ b/test/dtl_test_common.cpp @@ -0,0 +1,56 @@ + +#include "dtl_test_common.hpp" + +string create_path (const string& test_name, string diff_name, enum type_diff t, bool is_use_suffix) { + string ret; + switch (t) { + case TYPE_DIFF_SES: + ret = (getcwd(NULL, 0) + string("/") + string("ses") + string("/") + diff_name + string("/") + test_name); + break; + case TYPE_DIFF_UNI: + ret = (getcwd(NULL, 0) + string("/") + string("hunks") + string("/") + diff_name + string("/") + test_name); + break; + } + ret += is_use_suffix ? "_" : ""; + return ret; +} + +size_t cal_diff_uni (const string& path_l, const string& path_r) { + string buf; + ifstream lifs(path_l.c_str()); + ifstream rifs(path_r.c_str()); + + vector< string > llines; + vector< string > rlines; + + while (getline(lifs, buf)) { + llines.push_back(buf); + } + + while (getline(rifs, buf)) { + rlines.push_back(buf); + } + + Diff< string, vector< string > > diff_uni(llines, rlines); + diff_uni.compose(); + return diff_uni.getEditDistance(); +} + +bool is_file_exist (string& fs) { + FILE *fp; + if ((fp = fopen(fs.c_str(), "r")) == NULL) { + return false; + } + fclose(fp); + return true; +} + +void diff_resultset_exist_check (string &fs) { + if (!is_file_exist(fs)) { + cerr << "======================================================Error!!!======================================================" << endl; + cerr << "diff result set:" << fs << " is not found." << endl; + cerr << "======================================================Error!!!======================================================" << endl; + cerr << "excute dtl_test in dtl/test!" << endl; + exit(EXIT_FAILURE); + } +} diff --git a/test/dtl_test_common.hpp b/test/dtl_test_common.hpp new file mode 100644 index 0000000..76ad921 --- /dev/null +++ b/test/dtl_test_common.hpp @@ -0,0 +1,70 @@ + +#ifndef DTL_TEST_COMMON +#define DTL_TEST_COMMON + +#include +#include +#include +#include +#include +#include +#include +#include + +using std::cerr; +using std::endl; +using std::string; +using std::vector; +using std::pair; +using std::ifstream; +using std::ofstream; + +using dtl::Diff; +using dtl::Diff3; +using dtl::Compare; +using dtl::SES_COMMON; +using dtl::SES_ADD; +using dtl::SES_DELETE; +using dtl::elemInfo; +using dtl::uniHunk; + +#define dtl_test_typedefs(e_type, seq_type) \ + typedef e_type elem; \ + typedef seq_type sequence; \ + typedef pair< elem, elemInfo > sesElem; \ + typedef vector< elem > elemVec; \ + typedef vector< sesElem > sesElemVec; \ + typedef vector< uniHunk< sesElem > > uniHunkVec; + +enum type_diff { TYPE_DIFF_SES, TYPE_DIFF_UNI }; + +string create_path (const string& test_name, string diff_name, enum type_diff t, bool is_use_suffix = false); +size_t cal_diff_uni (const string& path_l, const string& path_r); +bool is_file_exist (string& fs); +void diff_resultset_exist_check (string &fs); + +template +class Remover { +public : + void operator()(const T& v){ + remove(v.path_rses.c_str()); + remove(v.path_rhunks.c_str()); + } +}; + +template < typename elem, typename sequence, typename comparator > +void create_file (const string& path, Diff< elem, sequence, comparator >& diff, enum type_diff t) { + ofstream ofs; + ofs.open(path.c_str()); + switch (t) { + case TYPE_DIFF_SES: + diff.printSES(ofs); + break; + case TYPE_DIFF_UNI: + diff.printUnifiedFormat(ofs); + break; + } + ofs.close(); +} + +#endif // DTL_TEST_COMMON diff --git a/test/hunks/intdiff/diff_test0 b/test/hunks/intdiff/diff_test0 new file mode 100644 index 0000000..e69de29 diff --git a/test/hunks/intdiff/diff_test1 b/test/hunks/intdiff/diff_test1 new file mode 100644 index 0000000..760015a --- /dev/null +++ b/test/hunks/intdiff/diff_test1 @@ -0,0 +1,2 @@ +@@ -1,0 +1,1 @@ ++1 diff --git a/test/hunks/intdiff/diff_test2 b/test/hunks/intdiff/diff_test2 new file mode 100644 index 0000000..a17b64b --- /dev/null +++ b/test/hunks/intdiff/diff_test2 @@ -0,0 +1,2 @@ +@@ -1,1 +1,0 @@ +-1 diff --git a/test/hunks/intdiff/diff_test3 b/test/hunks/intdiff/diff_test3 new file mode 100644 index 0000000..22886b7 --- /dev/null +++ b/test/hunks/intdiff/diff_test3 @@ -0,0 +1,15 @@ +@@ -1,10 +1,10 @@ +-1 +-2 + 3 ++5 ++1 + 4 + 5 +-6 ++1 + 7 +-8 + 9 ++6 + 10 diff --git a/test/hunks/intdiff/diff_test4 b/test/hunks/intdiff/diff_test4 new file mode 100644 index 0000000..31a964d --- /dev/null +++ b/test/hunks/intdiff/diff_test4 @@ -0,0 +1,8 @@ +@@ -1,5 +1,5 @@ +-1 +-2 + 3 ++5 ++1 + 4 + 5 diff --git a/test/hunks/strdiff/custom_test0 b/test/hunks/strdiff/custom_test0 new file mode 100644 index 0000000..e69de29 diff --git a/test/hunks/strdiff/diff_test0 b/test/hunks/strdiff/diff_test0 new file mode 100644 index 0000000..d3a7667 --- /dev/null +++ b/test/hunks/strdiff/diff_test0 @@ -0,0 +1,5 @@ +@@ -1,3 +1,3 @@ + a + b +-c ++d diff --git a/test/hunks/strdiff/diff_test1 b/test/hunks/strdiff/diff_test1 new file mode 100644 index 0000000..f415172 --- /dev/null +++ b/test/hunks/strdiff/diff_test1 @@ -0,0 +1,15 @@ +@@ -1,10 +1,12 @@ + a + c ++e + b + d +-e + a +-c + b ++b ++a ++b + e + d diff --git a/test/hunks/strdiff/diff_test2 b/test/hunks/strdiff/diff_test2 new file mode 100644 index 0000000..f883521 --- /dev/null +++ b/test/hunks/strdiff/diff_test2 @@ -0,0 +1,10 @@ +@@ -1,6 +1,6 @@ ++d + a +-b + c +-d +-e + f ++e ++a diff --git a/test/hunks/strdiff/diff_test3 b/test/hunks/strdiff/diff_test3 new file mode 100644 index 0000000..a39089a --- /dev/null +++ b/test/hunks/strdiff/diff_test3 @@ -0,0 +1,9 @@ +@@ -1,6 +1,6 @@ +-a + b ++d + c ++a + b +-d + a diff --git a/test/hunks/strdiff/diff_test4 b/test/hunks/strdiff/diff_test4 new file mode 100644 index 0000000..54610c6 --- /dev/null +++ b/test/hunks/strdiff/diff_test4 @@ -0,0 +1,6 @@ +@@ -2,4 +2,5 @@ + o + k + k ++k + o diff --git a/test/hunks/strdiff/diff_test5 b/test/hunks/strdiff/diff_test5 new file mode 100644 index 0000000..e69de29 diff --git a/test/hunks/strdiff/diff_test6 b/test/hunks/strdiff/diff_test6 new file mode 100644 index 0000000..4caf9eb --- /dev/null +++ b/test/hunks/strdiff/diff_test6 @@ -0,0 +1,2 @@ +@@ -1,1 +1,0 @@ +-a diff --git a/test/hunks/strdiff/diff_test7 b/test/hunks/strdiff/diff_test7 new file mode 100644 index 0000000..b4486fb --- /dev/null +++ b/test/hunks/strdiff/diff_test7 @@ -0,0 +1,2 @@ +@@ -1,0 +1,1 @@ ++b diff --git a/test/hunks/strdiff/diff_test8 b/test/hunks/strdiff/diff_test8 new file mode 100644 index 0000000..7bf08a9 --- /dev/null +++ b/test/hunks/strdiff/diff_test8 @@ -0,0 +1,22 @@ +@@ -1,9 +1,11 @@ + a + c ++e + b + d +-e + a ++b ++b + q + q + q +@@ -11,7 +13,7 @@ + q + q + q +-c ++a + b + e + d diff --git a/test/ses/intdiff/diff_test0 b/test/ses/intdiff/diff_test0 new file mode 100644 index 0000000..e69de29 diff --git a/test/ses/intdiff/diff_test1 b/test/ses/intdiff/diff_test1 new file mode 100644 index 0000000..556cf17 --- /dev/null +++ b/test/ses/intdiff/diff_test1 @@ -0,0 +1 @@ ++1 diff --git a/test/ses/intdiff/diff_test2 b/test/ses/intdiff/diff_test2 new file mode 100644 index 0000000..3a2e3f4 --- /dev/null +++ b/test/ses/intdiff/diff_test2 @@ -0,0 +1 @@ +-1 diff --git a/test/ses/intdiff/diff_test3 b/test/ses/intdiff/diff_test3 new file mode 100644 index 0000000..bbb8895 --- /dev/null +++ b/test/ses/intdiff/diff_test3 @@ -0,0 +1,14 @@ +-1 +-2 + 3 ++5 ++1 + 4 + 5 +-6 ++1 + 7 +-8 + 9 ++6 + 10 diff --git a/test/ses/intdiff/diff_test4 b/test/ses/intdiff/diff_test4 new file mode 100644 index 0000000..f0d4ce9 --- /dev/null +++ b/test/ses/intdiff/diff_test4 @@ -0,0 +1,7 @@ +-1 +-2 + 3 ++5 ++1 + 4 + 5 diff --git a/test/ses/strdiff/custom_test0 b/test/ses/strdiff/custom_test0 new file mode 100644 index 0000000..efe2cff --- /dev/null +++ b/test/ses/strdiff/custom_test0 @@ -0,0 +1,3 @@ + a + b + c diff --git a/test/ses/strdiff/diff_test0 b/test/ses/strdiff/diff_test0 new file mode 100644 index 0000000..831b035 --- /dev/null +++ b/test/ses/strdiff/diff_test0 @@ -0,0 +1,4 @@ + a + b +-c ++d diff --git a/test/ses/strdiff/diff_test1 b/test/ses/strdiff/diff_test1 new file mode 100644 index 0000000..2cacddb --- /dev/null +++ b/test/ses/strdiff/diff_test1 @@ -0,0 +1,14 @@ + a + c ++e + b + d +-e + a +-c + b ++b ++a ++b + e + d diff --git a/test/ses/strdiff/diff_test2 b/test/ses/strdiff/diff_test2 new file mode 100644 index 0000000..09c3c94 --- /dev/null +++ b/test/ses/strdiff/diff_test2 @@ -0,0 +1,9 @@ ++d + a +-b + c +-d +-e + f ++e ++a diff --git a/test/ses/strdiff/diff_test3 b/test/ses/strdiff/diff_test3 new file mode 100644 index 0000000..126966e --- /dev/null +++ b/test/ses/strdiff/diff_test3 @@ -0,0 +1,8 @@ +-a + b ++d + c ++a + b +-d + a diff --git a/test/ses/strdiff/diff_test4 b/test/ses/strdiff/diff_test4 new file mode 100644 index 0000000..f1fd383 --- /dev/null +++ b/test/ses/strdiff/diff_test4 @@ -0,0 +1,6 @@ + b + o + k + k ++k + o diff --git a/test/ses/strdiff/diff_test5 b/test/ses/strdiff/diff_test5 new file mode 100644 index 0000000..e69de29 diff --git a/test/ses/strdiff/diff_test6 b/test/ses/strdiff/diff_test6 new file mode 100644 index 0000000..66af866 --- /dev/null +++ b/test/ses/strdiff/diff_test6 @@ -0,0 +1 @@ +-a diff --git a/test/ses/strdiff/diff_test7 b/test/ses/strdiff/diff_test7 new file mode 100644 index 0000000..686db28 --- /dev/null +++ b/test/ses/strdiff/diff_test7 @@ -0,0 +1 @@ ++b diff --git a/test/ses/strdiff/diff_test8 b/test/ses/strdiff/diff_test8 new file mode 100644 index 0000000..e4ce6bb --- /dev/null +++ b/test/ses/strdiff/diff_test8 @@ -0,0 +1,21 @@ + a + c ++e + b + d +-e + a ++b ++b + q + q + q + q + q + q + q ++a +-c + b + e + d