Done except report

This commit is contained in:
Claudio Maggioni 2023-12-26 13:55:44 +01:00
parent 1153d5138a
commit f5f605c259
6 changed files with 70 additions and 32 deletions

View File

@ -13,7 +13,7 @@ Note: Feel free to modify this file according to the project's necessities.
## Environment setup
To install the required dependencies the Python version manager `pyenv` must be installed and in `$PATH`.
To install the required dependencies the Python version manager `pyenv` must be installed and in `$PATH`.
To set up a Python 3.11 virtualenv to execute parts 1, 2, and 3 of the project run:
@ -24,7 +24,7 @@ pyenv shell 3.11
python3.11 -m venv env
source env/bin/activate
pip3.11 install -r requirements.txt
pip3.11 install -r requirements_3.11.txt
```
To set up Python 3.7 (last version supported by `mut.py`) to execute part 4 of the project run:
@ -34,18 +34,16 @@ deactivate || true # deactivate existing environment
pyenv install -s 3.7
pyenv shell 3.7
python3.7 -m venv env37
source env37/bin/activate
pip3.7 install MutPy==0.6.1
pip3.7 install -r requirements.txt
source env37/bin/activate
pip3.7 install -r requirements_3.7.txt
```
## Instrumentation (Part 1)
To generate the instrumented code for all the files in the benchmark run the command:
To generate the instrumented code for all the files in the benchmark run the commands:
```shell
# Reset Python to latest (system) version
deactivate || true
pyenv shell 3.11
source env/bin/activate
@ -56,12 +54,36 @@ python3.11 ./instrument.py
The generated files are created in the directory `instrumented`. Each file name matches the file name of the
corresponding source file in `benchmark`.
## Test case generation (Part 2 and Part 3)
## Test case generation using the fuzzer (Part 2)
To generate test cases for all files in the benchmark run the command:
To generate test cases for all files in the benchmark using the fuzzer run the commands:
```shell
deactivate || true
pyenv shell 3.11
source env/bin/activate
python3.11 ./fuzzer.py
```
The test suite is created in the directory `fuzzer_tests`. One test file is generated for each file present in the
`benchmark` directory. Run the command with the `-h` options for more details on partial generation.
The test suite can be then executed over the benchmark code with the commands:
```shell
deactivate || true
pyenv shell 3.11
source env/bin/activate
python3.11 -m unittest discover fuzzer_tests
```
## Test case generation using the genetic algorithm (Part 3)
To generate test cases for all files in the benchmark using the genetic algorithm run the commands:
```shell
# Reset Python to latest (system) version
deactivate || true
pyenv shell 3.11
source env/bin/activate
@ -69,13 +91,12 @@ source env/bin/activate
python3.11 ./genetic.py
```
The test suite is created in the directory `tests`. One test file is generated for each file present in the
The test suite is created in the directory `tests`. One test file is generated for each file present in the
`benchmark` directory. Run the command with the `-h` options for more details on partial generation.
The test suite can be then executed over the benchmark code with the command:
The test suite can be then executed over the benchmark code with the commands:
```shell
# Reset Python to latest (system) version
deactivate || true
pyenv shell 3.11
source env/bin/activate
@ -94,4 +115,15 @@ pyenv shell 3.7
source env37/bin/activate
python3.7 muttest.py
```
```
The script will consider the tests in `fuzzer_tests` and `tests` and run mutation testing on them, collecting the
mutation score for each run in `out/mutation_results_fuzzer.csv` and `out/mutation_results_genetic.csv` respectively.
If either or both file exist, the mutation run for the matching test suite will be skipped and the saved values will be
used.
The script additionally generates two plots for the distribution and average of mutation scores per kind of generation
and benchmark file. These two plots are saved in `out/mutation_scores.png` and `out/mutation_scores_mean.png`
respectively. `out/stats.csv` is also generated and will contain a statistical comparison between the mutation score
distribution for the fuzzer-generated and genetic-generated test of each benchmark file, including the average score for
both generations, the Wilcoxon paired test p-value, the Cohen's d effect size and its interpretation.

View File

@ -9,8 +9,6 @@ import operators
class Archive:
true_branches: Dict[int, any]
false_branches: Dict[int, any]
false_score: Dict[int, any]
true_score: Dict[int, any]
f_name: str
def __init__(self, f_name: str) -> None:
@ -20,8 +18,6 @@ class Archive:
def reset(self):
self.true_branches = {}
self.false_branches = {}
self.true_score = {}
self.false_score = {}
def branches_covered(self) -> int:
return len(self.true_branches.keys()) + len(self.false_branches.keys())
@ -36,7 +32,7 @@ class Archive:
def suite_str(self):
suite = self.build_suite()
return " ".join([",".join([f'{k}={repr(v)}' for k, v in test.items()]) for test in suite])
return " ".join([",".join([f'{k}={repr(v)}' for k, v in test.items()]) + f",score={self}" for test in suite])
def consider_test(self, test_case: frozendict):
branch = self.satisfies_unseen_branches(test_case)

View File

@ -78,7 +78,7 @@ def compute_stats(df_gen: pd.DataFrame, df_fuz: pd.DataFrame, output_file: str,
df_avg.loc[f, 'interpretation'] = effect_size(df_avg.loc[f, 'cohen-d'])
df_avg.loc[f, 'wilcoxon'] = wilcoxon(list_gen, list_fuz, zero_method='zsplit').pvalue
df_avg.to_csv(stat_csv)
df_avg.round(4).to_csv(stat_csv)
def run_mutpy(test_path: str, source_path: str) -> float:

View File

@ -1,11 +1,11 @@
file,fuzzer,genetic,cohen-d,interpretation,wilcoxon
anagram_check,23.16,18.509999999999998,-0.569029291867328,Very small,0.05263321233144818
caesar_cipher,60.17999999999999,62.39,0.4672462236206022,Medium,0.35895143585262634
check_armstrong,89.53999999999999,89.17999999999999,-0.14272435323355917,Very small,0.625
common_divisor_count,71.21000000000001,72.26,0.25955481074139225,Medium,0.556640625
exponentiation,68.99,68.47999999999999,-0.09904987594430334,Very small,0.76953125
gcd,50.8,44.67999999999999,-1.0306023047883075,Very small,0.06654572134371614
longest_substring,83.96000000000001,83.01,-0.15469347200289738,Very small,0.845703125
rabin_karp,66.15,64.51,-0.45973880268318706,Very small,0.3080632299071987
railfence_cipher,90.28,89.62,-0.3514153148238166,Very small,0.375
zellers_birthday,69.1,67.86,-0.5598449297371694,Very small,0.18514372415787317
anagram_check,23.16,18.51,-0.569,Very small,0.0526
caesar_cipher,60.18,62.39,0.4672,Medium,0.359
check_armstrong,89.54,89.18,-0.1427,Very small,0.625
common_divisor_count,71.21,72.26,0.2596,Medium,0.5566
exponentiation,68.99,68.48,-0.099,Very small,0.7695
gcd,50.8,44.68,-1.0306,Very small,0.0665
longest_substring,83.96,83.01,-0.1547,Very small,0.8457
rabin_karp,66.15,64.51,-0.4597,Very small,0.3081
railfence_cipher,90.28,89.62,-0.3514,Very small,0.375
zellers_birthday,69.1,67.86,-0.5598,Very small,0.1851

1 file fuzzer genetic cohen-d interpretation wilcoxon
2 anagram_check 23.16 18.509999999999998 18.51 -0.569029291867328 -0.569 Very small 0.05263321233144818 0.0526
3 caesar_cipher 60.17999999999999 60.18 62.39 0.4672462236206022 0.4672 Medium 0.35895143585262634 0.359
4 check_armstrong 89.53999999999999 89.54 89.17999999999999 89.18 -0.14272435323355917 -0.1427 Very small 0.625
5 common_divisor_count 71.21000000000001 71.21 72.26 0.25955481074139225 0.2596 Medium 0.556640625 0.5566
6 exponentiation 68.99 68.47999999999999 68.48 -0.09904987594430334 -0.099 Very small 0.76953125 0.7695
7 gcd 50.8 44.67999999999999 44.68 -1.0306023047883075 -1.0306 Very small 0.06654572134371614 0.0665
8 longest_substring 83.96000000000001 83.96 83.01 -0.15469347200289738 -0.1547 Very small 0.845703125 0.8457
9 rabin_karp 66.15 64.51 -0.45973880268318706 -0.4597 Very small 0.3080632299071987 0.3081
10 railfence_cipher 90.28 89.62 -0.3514153148238166 -0.3514 Very small 0.375
11 zellers_birthday 69.1 67.86 -0.5598449297371694 -0.5598 Very small 0.18514372415787317 0.1851

View File

@ -4,6 +4,6 @@ astunparse==1.6.3
frozendict==2.3.8
tqdm==4.66.1
pandas==1.3.5
matplotlib!=3.6.1,>=3.1
matplotlib==3.8.2
seaborn==0.12.2
scipy==1.7.3
scipy==1.11.4

10
requirements_3.7.txt Normal file
View File

@ -0,0 +1,10 @@
nltk==3.8.1
deap==1.4.1
astunparse==1.6.3
frozendict==2.3.8
tqdm==4.66.1
pandas==1.3.5
matplotlib==3.5.3
seaborn==0.12.2
scipy==1.7.3
MutPy==0.6.1