In [5]:
import json
import glob
import os
import pandas as pd
import numpy as np

In [30]:
def read_series_from_plain_json(sample, json_filename):
    json_data = None
    with open(json_filename, "r") as f:
        json_data = json.load(f)
    
    indices = []
    values = []
    
    for experiment in json_data.keys():
        id = sample + "-" + experiment
        indices.append(id)
        values.append(json_data[experiment])
        
    return pd.Series(data=values, index=indices)

In [39]:
def read_count_from_misaxx_json(json_filename):
    json_data = None
    with open(json_filename, "r") as f:
        json_data = json.load(f)
    return json_data["misaxx-segment-cells:attachments/conidia-count"]["count"]

def read_series_from_misaxx_attachments(sample, attachment_dir):
    
    indices = []
    values = []
    
    for attachment_path in glob.glob(attachment_dir + "/*.json"):
        experiment = os.path.basename(attachment_path)[:-5][:-4]
        id = sample + "-" + experiment
        indices.append(id)
        values.append(read_count_from_misaxx_json(attachment_path))
        
    return pd.Series(data=values, index=indices)
        

In [42]:
df = pd.DataFrame()

for run_path in glob.glob("./cells_python*"):
    if not os.path.isdir(run_path):
        continue
    run = os.path.basename(run_path)
    series = pd.Series()
    
    for sample_path in glob.glob(run_path + "/output/*"):
        if not os.path.isdir(sample_path):
            continue
        sample = os.path.basename(sample_path)
        sample_series = read_series_from_plain_json(sample, sample_path + "/results.json")
        series = series.append(sample_series)
        
    run_df = pd.DataFrame({ run: series })
    df = pd.concat([df, run_df], axis=1, join="outer")
    
for run_path in glob.glob("./cells_java*"):
    if not os.path.isdir(run_path):
        continue
    run = os.path.basename(run_path)
    series = pd.Series()
    
    for sample_path in glob.glob(run_path + "/output/*"):
        if not os.path.isdir(sample_path):
            continue
        sample = os.path.basename(sample_path)
        sample_series = read_series_from_plain_json(sample, sample_path + "/conidia.json")
        series = series.append(sample_series)
        
    run_df = pd.DataFrame({ run: series })
    df = pd.concat([df, run_df], axis=1, join="outer", sort=True)
    
for run_path in glob.glob("./cells_cxx*"):
    if not os.path.isdir(run_path):
        continue
    run = os.path.basename(run_path)
    series = pd.Series()
    
    for sample_path in glob.glob(run_path + "/output-docker/attachments/exported/*"):
        if not os.path.isdir(sample_path):
            continue
        sample = os.path.basename(sample_path)
        sample_series = read_series_from_misaxx_attachments(sample, sample_path)
        series = series.append(sample_series)
        
    run_df = pd.DataFrame({ run: series })
    df = pd.concat([df, run_df], axis=1, join="outer", sort=True)
    

In [43]:
df

Unnamed: 0,cells_python_snakemake_noMT_opMT,cells_python_snakemake_dag30_opMT,cells_python_snakemake_noMT_noopMT,cells_python_snakemake_dag30_noopMT,cells_java_imglib2_dag30_noopMT,cells_java_imglib2_noMT_noopMT,cells_cxx_misaxx_noMT_opMT,cells_cxx_misaxx_noMT_noopMT,cells_cxx_misaxx_dag30_opMT,cells_cxx_misaxx_dag30_noopMT
ATTC-2µl_3rdReplicate-Experiment-5519,707,707,707,707,708,708,709,709,709,709
ATTC-2µl_3rdReplicate-Experiment-5520,378,378,378,378,379,379,375,375,375,375
ATTC-2µl_3rdReplicate-Experiment-5521,723,723,723,723,727,727,720,720,720,720
ATTC-2µl_3rdReplicate-Experiment-5522,806,806,806,806,814,814,798,798,798,798
ATTC-2µl_3rdReplicate-Experiment-5523,531,531,531,531,539,539,526,526,526,526
...,...,...,...,...,...,...,...,...,...,...
ATTC_IµL_3rdReplicate-Experiment-5514,443,443,443,443,460,460,445,445,445,445
ATTC_IµL_3rdReplicate-Experiment-5515,406,406,406,406,403,403,404,404,404,404
ATTC_IµL_3rdReplicate-Experiment-5516,449,449,449,449,449,449,448,448,448,448
ATTC_IµL_3rdReplicate-Experiment-5517,614,614,614,614,619,619,637,637,637,637


In [44]:
os.makedirs("analysis/cells")

In [45]:
df.to_csv("analysis/cells/counts.csv")

# Runtimes

In [54]:
def read_python_runtime(directory):
    with open(directory + "/runtime.log", "r") as f:
        return float(f.read()) / 60
    
def read_java_runtime(directory):
    with open(directory + "/output/runtime.log", "r") as f:
        return float(f.read()) / 1000 / 60
    
def read_misaxx_runtime(directory):
    json_data = None
    with open(directory + "/output-docker/runtime-log.json", "r") as f:
        json_data = json.load(f)
    thread0_data = json_data["entries"]["thread0"]
    end_times = [x["end-time"] for x in thread0_data]
    return np.max(end_times) / 1000 / 60

In [55]:
runtime_runs = []
runtime_runtimes = []

for run_path in glob.glob("./cells_python*"):
    if not os.path.isdir(run_path):
        continue
    run = os.path.basename(run_path)
    rt = read_python_runtime(run_path)
    runtime_runs.append(run)
    runtime_runtimes.append(rt)
    
for run_path in glob.glob("./cells_java*"):
    if not os.path.isdir(run_path):
        continue
    run = os.path.basename(run_path)
    rt = read_java_runtime(run_path)
    runtime_runs.append(run)
    runtime_runtimes.append(rt)  
    
for run_path in glob.glob("./cells_cxx*"):
    if not os.path.isdir(run_path):
        continue
    run = os.path.basename(run_path)
    rt = read_misaxx_runtime(run_path)
    runtime_runs.append(run)
    runtime_runtimes.append(rt)

df_runtime = pd.DataFrame({"Runtime (min)": runtime_runtimes}, index=runtime_runs)

In [56]:
df_runtime

Unnamed: 0,Runtime (min)
cells_python_snakemake_noMT_opMT,7.05
cells_python_snakemake_dag30_opMT,0.416667
cells_python_snakemake_noMT_noopMT,6.583333
cells_python_snakemake_dag30_noopMT,0.4
cells_java_imglib2_dag30_noopMT,12.34315
cells_java_imglib2_noMT_noopMT,51.847867
cells_cxx_misaxx_noMT_opMT,1.151505
cells_cxx_misaxx_noMT_noopMT,1.403558
cells_cxx_misaxx_dag30_opMT,0.085341
cells_cxx_misaxx_dag30_noopMT,0.080374


In [57]:
df_runtime.to_csv("analysis/cells/runtimes.csv")

# Intra-implementation equality

In [60]:
df = pd.read_csv("analysis/cells/counts.csv", index_col=0)
df

Unnamed: 0,cells_python_snakemake_noMT_opMT,cells_python_snakemake_dag30_opMT,cells_python_snakemake_noMT_noopMT,cells_python_snakemake_dag30_noopMT,cells_java_imglib2_dag30_noopMT,cells_java_imglib2_noMT_noopMT,cells_cxx_misaxx_noMT_opMT,cells_cxx_misaxx_noMT_noopMT,cells_cxx_misaxx_dag30_opMT,cells_cxx_misaxx_dag30_noopMT
ATTC-2µl_3rdReplicate-Experiment-5519,707,707,707,707,708,708,709,709,709,709
ATTC-2µl_3rdReplicate-Experiment-5520,378,378,378,378,379,379,375,375,375,375
ATTC-2µl_3rdReplicate-Experiment-5521,723,723,723,723,727,727,720,720,720,720
ATTC-2µl_3rdReplicate-Experiment-5522,806,806,806,806,814,814,798,798,798,798
ATTC-2µl_3rdReplicate-Experiment-5523,531,531,531,531,539,539,526,526,526,526
...,...,...,...,...,...,...,...,...,...,...
ATTC_IµL_3rdReplicate-Experiment-5514,443,443,443,443,460,460,445,445,445,445
ATTC_IµL_3rdReplicate-Experiment-5515,406,406,406,406,403,403,404,404,404,404
ATTC_IµL_3rdReplicate-Experiment-5516,449,449,449,449,449,449,448,448,448,448
ATTC_IµL_3rdReplicate-Experiment-5517,614,614,614,614,619,619,637,637,637,637


In [69]:
for implementation in ["cxx", "java", "python"]:
    columns = [x for x in df.columns if implementation in x]
    idf = df.filter(columns)
    for index, row in idf.iterrows():
        assert len(np.unique(row)) == 1

In [72]:
from datetime import datetime

In [73]:
start = datetime.now()


In [74]:
end = datetime.now()

In [76]:
td = end - start

In [78]:
td.total_seconds()

5.552313