# Takes a sample of binary variables and generates an
# directed graph where directed edges represent casual relationships
# between two variables. Undirected edges (with non-zero values for 
# both G_ij and G_ji) represent ambiguous relationships. Each such edge
# implies two Markov equivalent graphs.

import numpy as np
import pandas as pd
from graph_io import write_graph
from graph_functions import prune_graph
from graph_functions import assign_causality

# Configuration
input_file = 'smoking_sample_latent_nogenes.csv'
output_prefix = 'blog_21_smoking_latent'
pthres = 0.01 # Threshold for inferring independence

# Read sample
with open(input_file, "r") as file_in:
    header = file_in.readline().rstrip('\n').lstrip('# ')
    nodes = header.split(',')
    N_nodes = len(nodes)

sample = np.loadtxt(input_file, comments='#', delimiter=',')
sample = pd.DataFrame(data=sample, columns=nodes)

# Prune the graph
G = prune_graph(sample, pthres, verbose=True)

# Output all graphs as JSON
for key in list(G.keys()):
    output_file = '{}_pruned_{}.json'.format(output_prefix, key)
    write_graph(output_file, G[key], nodes, width=250, height=250)

# Assign causality to edges
Gdir = assign_causality(sample, G['Final'], nodes)            

print(nodes)
print(Gdir)
output_file = '{}_directed.json'.format(output_prefix)
write_graph(output_file, Gdir, nodes, width=250, height=250, is_directed=True)

    