kse-02/muttest.py

149 lines
4.9 KiB
Python

import math
import os
import re
import subprocess
import sys
from math import sqrt
from statistics import mean, variance
from typing import List, Dict, Callable, Set
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy.stats import wilcoxon
from tqdm import tqdm
import genetic
from fuzzer import generate_tests, fuzzer_generate
from instrument import Params
ROOT_DIR = os.path.dirname(__file__)
IN_SOURCE_DIR = os.path.join(ROOT_DIR, "benchmark")
IN_TEST_DIR = os.path.join(ROOT_DIR, "tests")
IN_FUZZER_TEST_DIR = os.path.join(ROOT_DIR, "fuzzer_tests")
OUT_DIR = os.path.join(ROOT_DIR, "out")
MUT_PY_PATH = os.path.join(ROOT_DIR, 'env37', 'bin', 'mut.py')
REPS: int = 10
def cohen_d(d1: List[float], d2: List[float]) -> float:
pooled_sd = sqrt(((len(d1) - 1) * variance(d1) + (len(d2) - 1) * variance(d2)) /
(len(d1) + len(d2) - 2))
if pooled_sd == 0:
return math.inf
return (mean(d1) - mean(d2)) / pooled_sd
def effect_size(eff: float) -> str:
eff = abs(eff)
if eff <= 0.01:
return 'Very small'
elif eff <= 0.2:
return 'Small'
elif eff <= 0.5:
return 'Medium'
elif eff <= 0.8:
return 'Large'
elif eff <= 1.2:
return 'Very large'
else:
return 'Huge'
def compute_stats(df_gen: pd.DataFrame, df_fuz: pd.DataFrame, output_file: str, avg_output_file: str, stat_csv: str):
combined_df = pd.concat([df_gen, df_fuz], keys=["genetic", "fuzzer"]).reset_index()
combined_df.columns = ['source', *combined_df.columns[1:]]
del combined_df[combined_df.columns[1]]
combined_df = combined_df.sort_values(['source', 'file'])
plt.figure(figsize=(10, 6))
sns.set(style="whitegrid")
sns.boxplot(data=combined_df, x="file", y="score", hue="source")
plt.yticks(range(0, 101, 10))
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(output_file)
plt.figure(figsize=(10, 6))
df_avg = combined_df.groupby(['file', 'source']).mean().reset_index().sort_values(['source', 'file'])
sns.set(style="whitegrid")
sns.barplot(data=df_avg, x="file", y="score", hue="source")
plt.yticks(range(0, 101, 10))
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(avg_output_file)
df_avg = df_avg.pivot(index='file', columns='source', values='score').rename_axis(None, axis=1)
df_avg['cohen-d'] = [math.nan] * len(df_avg.index)
df_avg['interpretation'] = [math.nan] * len(df_avg.index)
df_avg['wilcoxon'] = [math.nan] * len(df_avg.index)
for f in combined_df['file'].drop_duplicates():
list_gen = df_gen.loc[(df_gen.file == f), 'score'].tolist()
list_fuz = df_fuz.loc[(df_fuz.file == f), 'score'].tolist()
df_avg.loc[f, 'cohen-d'] = cohen_d(list_gen, list_fuz)
df_avg.loc[f, 'interpretation'] = effect_size(df_avg.loc[f, 'cohen-d'])
df_avg.loc[f, 'wilcoxon'] = wilcoxon(list_gen, list_fuz, zero_method='zsplit').pvalue
df_avg.round(4).to_csv(stat_csv)
def run_mutpy(test_path: str, source_path: str) -> float:
output = subprocess.check_output(
[sys.executable,
MUT_PY_PATH,
'-t', source_path,
'-u', test_path]).decode('utf-8')
score = re.search('Mutation score \\[.*]: (\\d+\\.\\d+)%', output).group(1)
return float(score)
def mutate_suite(out_file: str, in_test_dir: str, to_test: List[str], seeds: List[int],
generation_fn: Callable[[str], Set[Params]]):
scores: List[Dict[str, any]] = []
if os.path.isfile(out_file): # do not re-generate if file exists
return pd.read_csv(out_file, index_col=0)
for seed in tqdm(seeds, desc=f"generating with seeds"):
generate_tests([], seed, generation_fn, in_test_dir)
for filename in tqdm(to_test, desc=f"mut.py [{os.path.basename(out_file)}]"):
source_path = os.path.join(IN_SOURCE_DIR, f"{filename}.py")
test_path = os.path.join(in_test_dir, f"test_{filename}.py")
scores.append({
'file': filename,
'score': run_mutpy(test_path, source_path)
})
df = pd.DataFrame.from_records(scores)
df.to_csv(out_file)
return df
def main():
files = [os.path.splitext(f) for f in os.listdir(IN_SOURCE_DIR)]
to_test = [file[0] for file in files if file[1] == ".py"]
seeds = [182, 81, 95, 16, 124, 166, 178, 22, 20, 54]
genetic.init_deap()
df_gen = mutate_suite(os.path.join(OUT_DIR, 'mutation_results_genetic.csv'), IN_TEST_DIR, to_test, seeds,
genetic.generate)
df_fuz = mutate_suite(os.path.join(OUT_DIR, 'mutation_results_fuzzer.csv'), IN_FUZZER_TEST_DIR, to_test, seeds,
fuzzer_generate)
compute_stats(df_gen, df_fuz,
os.path.join(OUT_DIR, "mutation_scores.png"),
os.path.join(OUT_DIR, "mutation_scores_mean.png"),
os.path.join(OUT_DIR, "stats.csv"))
if __name__ == "__main__":
main()