#!/nfsmnt/malaria0/ssi/visintainer/local/bin/python

from numpy import *
from optparse import OptionParser
from mlpy import *

  
# Command line parsing
parser = OptionParser()
parser.add_option("-d", "--data", metavar = "FILE", action = "store", type = "string",
                  dest = "data", help = "data - required")
parser.add_option("-s", "--standardize", action = "store_true", default = False,
                  dest = "stand", help = "standardize data")
parser.add_option("-n", "--normalize", action = "store_true", default = False,
                  dest = "norm", help = "normalize data")

parser.add_option("-k", action = "store", type = "int",
                  dest = "k", help = "k for k-fold cross validation")
parser.add_option("-c", action = "store", type = "int", nargs = 2, metavar = "SETS PAIRS",
                  dest = "c", help = "sets and pairs for monte carlo cross validation")
parser.add_option("-S", "--stratified", action = "store_true", default = False,
                  dest = "strat", help = "for stratified cv")

parser.add_option("-K", "--kernel", action = "store", type = "string",
                  dest = "kernel", help = "kernel: 'linear', 'gaussian', 'polynomial', 'tr' [default %default]", default = 'linear')
parser.add_option("-P", "--kparameter", action = "store", type = "float",
                  dest = "kparameter", help = "kernel parameter (two sigma squared) for gaussian and polynomial kernels [default %default]", default = 0.1)
parser.add_option("-o", "--cost", action = "store", type = "float",
                  dest = "cost", help = "for cost-sensitive classification [-1.0, 1.0] [default %default]", default = 0.0)

parser.add_option("-m", "--min", action = "store", type = "float",
                  dest = "min", help = "min value for regularization parameter [default %default]", default = -5)
parser.add_option("-M", "--max", action = "store", type = "float",
                  dest = "max", help = "max value for regularization parameter [default %default]", default = 5)
parser.add_option("-p", "--steps", action = "store", type = "int",
                  dest = "steps", help = "steps for regularization parameter [default %default]", default = 11)
parser.add_option("-e", "--scale", action = "store", type = "string",
                  dest = "scale",  help = "scale for regularization parameter: 'lin' or 'log' [default %default]", default = "log")

parser.add_option("-l", "--lists", action = "store_true", default = False,
                  dest = "lists", help = "Canberra distance indicator")
parser.add_option("-a", "--auc", action = "store_true", default = False,
                  dest = "auc", help = "Wmw_auc metric computation")

(options, args) = parser.parse_args()
if not options.data:
    parser.error("option -d (data) is required")
if not options.kernel in ['linear', 'gaussian', 'polynomial', 'tr']:
    parser.error("bad option -l (kernel)")
if options.cost > 1.0 or options.cost < -1.0:
    parser.error("bad option -c (cost)")
if not (options.k or options.c):
    parser.error("option -k (k-fold) or -c (monte carlo) for resampling is required")
if (options.k and options.c):
    parser.error("option -k (k-fold) and -c (monte carlo) are mutually exclusive")
if not options.scale in ["lin", "log"]:
    parser.error("option -e (scale) should be 'lin' or 'log'")

# C values
if options.scale == 'lin':
    C = linspace(options.min, options.max, options.steps)
elif options.scale == 'log':
    C = logspace(options.min, options.max, options.steps)

# Data
x, y = data_fromfile(options.data)
if options.stand:
    x = data_standardize(x)
if options.norm:
    x = data_normalize(x)

print "Samples: %d (1: %d, -1: %d) - Features: %d" % (x.shape[0], sum(y == 1), sum(y == -1), x.shape[1])

# Resampling
if options.strat:
    if options.k:
        print "Stratified %d-Fold cv" % options.k
        res = kfoldS(cl = y, sets = options.k)
    elif options.c:
        print "Stratified Monte Carlo CV (%d sets, %d pairs)" %(options.c[0], options.c[1])
        res = montecarloS(cl = y, sets = options.c[0], pairs = options.c[1])
else:
    if options.k:
        print "%d-Fold cv" % options.k
        res = kfold(nsamples = y.shape[0], sets = options.k)
    elif options.c:
        print "Monte Carlo cv (%d sets, %d pairs)" %(options.c[0], options.c[1])
        res = montecarlo(nsamples = y.shape[0], sets = options.c[0], pairs = options.c[1])

print

if options.lists:
    R = Ranking(method='onestep')
    lp = empty((len(res), x.shape[1]), dtype = int)


# Compute
for c in C:
    
    s = Svm(kernel = options.kernel, kp = options.kparameter, cost = options.cost, C = c) # Initialize svm class
    ERR = 0.0 # Initialize error
    MCC = 0.0 # Initialize mcc
    if options.auc:
        AUC = 0.0 # Initialize auc

    for i, r in enumerate(res):
        xtr, ytr, xts, yts = x[r[0]], y[r[0]], x[r[1]], y[r[1]]
        s.compute(xtr, ytr)
        p = s.predict(xts)

        if options.lists:
            lp[i] = R.compute(xtr, ytr, s)[0].argsort()
        
        ERR += err(yts, p)
        MCC += mcc(yts, p)
        if options.auc:
            AUC += wmw_auc(yts,p)

    
    ERR /= float(len(res))
    MCC /= float(len(res))
    
    if options.auc:
        AUC /= float(len(res))
    else:
        AUC = nan

    if options.lists:
        DIST = canberra(lp, x.shape[1])
    else:
        DIST = 'unknown'
    
    print "C %e: error %f, mcc %f, auc %f, dist %s" \
          % (c, ERR, MCC, AUC, DIST)
