Done except report

2023-12-26 13:55:44 +01:00 · 2023-12-26 13:55:44 +01:00 · f5f605c259
parent 1153d5138a
commit f5f605c259
6 changed files with 70 additions and 32 deletions
--- a/README.md
+++ b/README.md
@ -13,7 +13,7 @@ Note: Feel free to modify this file according to the project's necessities.

 ## Environment setup

-To install the required dependencies the Python version manager `pyenv` must be installed and in `$PATH`. 
+To install the required dependencies the Python version manager `pyenv` must be installed and in `$PATH`.

 To set up a Python 3.11 virtualenv to execute parts 1, 2, and 3 of the project run:

@ -24,7 +24,7 @@ pyenv shell 3.11
 python3.11 -m venv env

 source env/bin/activate
-pip3.11 install -r requirements.txt
+pip3.11 install -r requirements_3.11.txt
 ```

 To set up Python 3.7 (last version supported by `mut.py`) to execute part 4 of the project run:
@ -34,18 +34,16 @@ deactivate || true  # deactivate existing environment
 pyenv install -s 3.7
 pyenv shell 3.7
 python3.7 -m venv env37
-source env37/bin/activate

-pip3.7 install MutPy==0.6.1
-pip3.7 install -r requirements.txt
+source env37/bin/activate
+pip3.7 install -r requirements_3.7.txt
 ```

 ## Instrumentation (Part 1)

-To generate the instrumented code for all the files in the benchmark run the command:
+To generate the instrumented code for all the files in the benchmark run the commands:

 ```shell
-# Reset Python to latest (system) version
 deactivate || true
 pyenv shell 3.11
 source env/bin/activate
@ -56,12 +54,36 @@ python3.11 ./instrument.py
 The generated files are created in the directory `instrumented`. Each file name matches the file name of the
 corresponding source file in `benchmark`.

-## Test case generation (Part 2 and Part 3)
+## Test case generation using the fuzzer (Part 2)

-To generate test cases for all files in the benchmark run the command:
+To generate test cases for all files in the benchmark using the fuzzer run the commands:
+
+```shell
+deactivate || true
+pyenv shell 3.11
+source env/bin/activate
+
+python3.11 ./fuzzer.py
+```
+
+The test suite is created in the directory `fuzzer_tests`. One test file is generated for each file present in the
+`benchmark` directory. Run the command with the `-h` options for more details on partial generation.
+
+The test suite can be then executed over the benchmark code with the commands:
+
+```shell
+deactivate || true
+pyenv shell 3.11
+source env/bin/activate
+
+python3.11 -m unittest discover fuzzer_tests
+```
+
+## Test case generation using the genetic algorithm (Part 3)
+
+To generate test cases for all files in the benchmark using the genetic algorithm run the commands:

 ```shell
-# Reset Python to latest (system) version
 deactivate || true
 pyenv shell 3.11
 source env/bin/activate
@ -69,13 +91,12 @@ source env/bin/activate
 python3.11 ./genetic.py
 ```

-The test suite is created in the directory `tests`. One test file is generated for each file present in the 
+The test suite is created in the directory `tests`. One test file is generated for each file present in the
 `benchmark` directory. Run the command with the `-h` options for more details on partial generation.

-The test suite can be then executed over the benchmark code with the command:
+The test suite can be then executed over the benchmark code with the commands:

 ```shell
-# Reset Python to latest (system) version
 deactivate || true
 pyenv shell 3.11
 source env/bin/activate
@ -94,4 +115,15 @@ pyenv shell 3.7
 source env37/bin/activate

 python3.7 muttest.py
-```
+```
+
+The script will consider the tests in `fuzzer_tests` and `tests` and run mutation testing on them, collecting the
+mutation score for each run in `out/mutation_results_fuzzer.csv` and `out/mutation_results_genetic.csv` respectively.
+If either or both file exist, the mutation run for the matching test suite will be skipped and the saved values will be
+used.
+
+The script additionally generates two plots for the distribution and average of mutation scores per kind of generation
+and benchmark file. These two plots are saved in `out/mutation_scores.png` and `out/mutation_scores_mean.png`
+respectively. `out/stats.csv` is also generated and will contain a statistical comparison between the mutation score
+distribution for the fuzzer-generated and genetic-generated test of each benchmark file, including the average score for
+both generations, the Wilcoxon paired test p-value, the Cohen's d effect size and its interpretation. 
--- a/archive.py
+++ b/archive.py
@ -9,8 +9,6 @@ import operators
 class Archive:
    true_branches: Dict[int, any]
    false_branches: Dict[int, any]
-    false_score: Dict[int, any]
-    true_score: Dict[int, any]
    f_name: str

    def __init__(self, f_name: str) -> None:
@ -20,8 +18,6 @@ class Archive:
    def reset(self):
        self.true_branches = {}
        self.false_branches = {}
-        self.true_score = {}
-        self.false_score = {}

    def branches_covered(self) -> int:
        return len(self.true_branches.keys()) + len(self.false_branches.keys())
@ -36,7 +32,7 @@ class Archive:

    def suite_str(self):
        suite = self.build_suite()
-        return " ".join([",".join([f'{k}={repr(v)}' for k, v in test.items()]) for test in suite])
+        return " ".join([",".join([f'{k}={repr(v)}' for k, v in test.items()]) + f",score={self}" for test in suite])

    def consider_test(self, test_case: frozendict):
        branch = self.satisfies_unseen_branches(test_case)
--- a/muttest.py
+++ b/muttest.py
@ -78,7 +78,7 @@ def compute_stats(df_gen: pd.DataFrame, df_fuz: pd.DataFrame, output_file: str,
        df_avg.loc[f, 'interpretation'] = effect_size(df_avg.loc[f, 'cohen-d'])
        df_avg.loc[f, 'wilcoxon'] = wilcoxon(list_gen, list_fuz, zero_method='zsplit').pvalue

-    df_avg.to_csv(stat_csv)
+    df_avg.round(4).to_csv(stat_csv)


 def run_mutpy(test_path: str, source_path: str) -> float:
--- a/out/stats.csv
+++ b/out/stats.csv
@ -1,11 +1,11 @@
 file,fuzzer,genetic,cohen-d,interpretation,wilcoxon
-anagram_check,23.16,18.509999999999998,-0.569029291867328,Very small,0.05263321233144818
-caesar_cipher,60.17999999999999,62.39,0.4672462236206022,Medium,0.35895143585262634
-check_armstrong,89.53999999999999,89.17999999999999,-0.14272435323355917,Very small,0.625
-common_divisor_count,71.21000000000001,72.26,0.25955481074139225,Medium,0.556640625
-exponentiation,68.99,68.47999999999999,-0.09904987594430334,Very small,0.76953125
-gcd,50.8,44.67999999999999,-1.0306023047883075,Very small,0.06654572134371614
-longest_substring,83.96000000000001,83.01,-0.15469347200289738,Very small,0.845703125
-rabin_karp,66.15,64.51,-0.45973880268318706,Very small,0.3080632299071987
-railfence_cipher,90.28,89.62,-0.3514153148238166,Very small,0.375
-zellers_birthday,69.1,67.86,-0.5598449297371694,Very small,0.18514372415787317
+anagram_check,23.16,18.51,-0.569,Very small,0.0526
+caesar_cipher,60.18,62.39,0.4672,Medium,0.359
+check_armstrong,89.54,89.18,-0.1427,Very small,0.625
+common_divisor_count,71.21,72.26,0.2596,Medium,0.5566
+exponentiation,68.99,68.48,-0.099,Very small,0.7695
+gcd,50.8,44.68,-1.0306,Very small,0.0665
+longest_substring,83.96,83.01,-0.1547,Very small,0.8457
+rabin_karp,66.15,64.51,-0.4597,Very small,0.3081
+railfence_cipher,90.28,89.62,-0.3514,Very small,0.375
+zellers_birthday,69.1,67.86,-0.5598,Very small,0.1851
--- a/requirements_3.11.txt
+++ b/requirements_3.11.txt
@ -4,6 +4,6 @@ astunparse==1.6.3
 frozendict==2.3.8
 tqdm==4.66.1
 pandas==1.3.5
-matplotlib!=3.6.1,>=3.1
+matplotlib==3.8.2
 seaborn==0.12.2
-scipy==1.7.3
+scipy==1.11.4
--- a/requirements_3.7.txt
+++ b/requirements_3.7.txt
@ -0,0 +1,10 @@
+nltk==3.8.1
+deap==1.4.1
+astunparse==1.6.3
+frozendict==2.3.8
+tqdm==4.66.1
+pandas==1.3.5
+matplotlib==3.5.3
+seaborn==0.12.2
+scipy==1.7.3
+MutPy==0.6.1