#!/usr/local/bin/python3

# avenir-python: Machine Learning
# Author: Pranab Ghosh
# 
# Licensed under the Apache License, Version 2.0 (the "License"); you
# may not use this file except in compliance with the License. You may
# obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0 
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the License for the specific language governing
# permissions and limitations under the License.

# Package imports
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
import sklearn as sk
import matplotlib
import random
import jprops
from io import StringIO
from sklearn.model_selection import cross_val_score
import joblib
from random import randint
from io import StringIO
from sklearn.linear_model import LinearRegression
sys.path.append(os.path.abspath("../lib"))
from util import *
from mlutil import *
from pasearch import *

class BaseRegressor(object):
	"""
	base regression class
	"""
	
	def __init__(self, configFile, defValues):
		"""
		intializer
		"""
		defValues["common.mode"] = ("train", None)
		defValues["common.model.directory"] = ("model", None)
		defValues["common.model.file"] = (None, None)
		defValues["common.scale.file.path"] = (None, "missing scale file path")
		defValues["common.preprocessing"] = (None, None)
		defValues["common.verbose"] = (False, None)
		defValues["train.data.file"] = (None, "missing training data file")
		defValues["train.data.fields"] = (None, "missing training data field ordinals")
		defValues["train.data.feature.fields"] = (None, "missing training data feature field ordinals")
		defValues["train.data.out.field"] = (None, "missing out field ordinal")	

		self.config = Configuration(configFile, defValues)
		self.featData = None
		self.outData = None
		self.regressor = None
		self.verbose = self.config.getBooleanConfig("common.verbose")[0]
		self.mode = self.config.getBooleanConfig("common.mode")[0]
		logFilePath = self.config.getStringConfig("common.logging.file")[0]
		logLevName = self.config.getStringConfig("common.logging.level")[0]
		self.logger = createLogger(__name__, logFilePath, logLevName)
		self.logger.info("********* starting session")
	
	def initConfig(self, configFile, defValues):
		"""
		initialize config
		"""
		self.config = Configuration(configFile, defValues)
	
	def getConfig(self):
		"""
		get config object
		"""
		return self.config
	
	def setConfigParam(self, name, value):
		"""
		set config param
		"""
		self.config.setParam(name, value)

	def getMode(self):
		"""
		get mode
		"""
		return self.mode

	def train(self):
		"""
		train model
		"""
		#build model
		self.buildModel()
		
		# training data
		if self.featData is None:
			(featData, outData) = self.prepData("train")
			(self.featData, self.outData) = (featData, outData)
		else:
			(featData, outData) = (self.featData, self.outData)
		
		# parameters
		modelSave = self.config.getBooleanConfig("train.model.save")[0]
		
		#train
		self.logger.info("...training model")
		self.regressor.fit(featData, outData) 
		rsqScore = self.regressor.score(featData, outData)
		coef = self.regressor.coef_
		intc = self.regressor.intercept_
		result = (rsqScore, intc, coef)
		
		if modelSave:
			self.logger.info("...saving model")
			modelFilePath = self.getModelFilePath()
			joblib.dump(self.regressor, modelFilePath) 
		return result
		
	def validate(self):
		# create model
		self.prepModel()	
			
		# prepare test data
		(featData, outDataActual) = self.prepData("validate")
		
		#predict
		self.logger.info("...predicting")
		outDataPred = self.regressor.predict(featData) 
		
		#error
		rsqScore = self.regressor.score(featData, outDataActual)
		result = (outDataPred, rsqScore)
		return result

	def predict(self):
		"""
		predict using trained model
		"""
		# create model
		self.prepModel()
		
		# prepare test data
		featData = self.prepData("predict")[0]
		
		#predict
		self.logger.info("...predicting")
		outData = self.regressor.predict(featData) 
		return outData

	def prepData(self, mode):
		"""
		loads and prepares data for training and validation
		"""
		# parameters
		key = mode  + ".data.file"
		dataFile = self.config.getStringConfig(key)[0]
		
		key = mode  + ".data.fields"
		fieldIndices = self.config.getStringConfig(key)[0]
		if not fieldIndices is None:
			fieldIndices = strToIntArray(fieldIndices, ",")
			
		
		key = mode  + ".data.feature.fields"
		featFieldIndices = self.config.getStringConfig(key)[0]
		if not featFieldIndices is None:
			featFieldIndices = strToIntArray(featFieldIndices, ",")
		
		if not mode == "predict":	
			key = mode  + ".data.out.field"
			outFieldIndex = self.config.getIntConfig(key)[0]

		#load data
		(data, featData) = loadDataFile(dataFile, ",", fieldIndices, featFieldIndices)
		if (self.config.getStringConfig("common.preprocessing")[0] == "scale"):
			featData = sk.preprocessing.scale(featData)
		outData = None
		if not mode == "predict":
			outData = extrColumns(data, outFieldIndex)
		return (featData, outData)
		
	def prepModel(self):
		"""
		load saved model or train model
		"""
		useSavedModel = self.config.getBooleanConfig("predict.use.saved.model")[0]
		if (useSavedModel and not self.regressor):
			# load saved model
			self.logger.info("...loading saved model")
			modelFilePath = self.getModelFilePath()
			self.regressor = joblib.load(modelFilePath)
		else:
			# train model
			self.train()
	
class LinearRegressor(BaseRegressor):
	"""
	linear regression
	"""
	def __init__(self, configFile):
		defValues = {}
		defValues["train.normalize"] = (False, None)	

		super(LinearRegressor, self).__init__(configFile, defValues)

	def buildModel(self):
		"""
		builds model object
		"""
		self.logger.info("...building linear regression model")
		normalize = self.config.getBooleanConfig("train.normalize")[0]
		self.regressor = LinearRegression(normalize=normalize)

class ElasticNetRegressor(BaseRegressor):
	"""
	elastic net regression
	"""
	def __init__(self, configFile):
		defValues = {}
		defValues["train.alpha"] = (1.0, None)	
		defValues["train.loneratio"] = (0.5, None)
		defValues["train.normalize"] = (False, None)	
		defValues["train.precompute"] = (False, None)	
		defValues["train.max.iter"] = (1000, None)	
		defValues["train.tol"] = (0.0001, None)	
		defValues["train.random.state"] = (None, None)	
		defValues["train.selection"] = ("cyclic", None)	

		super(ElasticNetRegressor, self).__init__(configFile, defValues)

	def buildModel(self):
		"""
		builds model object
		"""
		self.logger.info("...building elastic net regression model")
		alpha = self.config.getFloatConfig("train.alpha")[0]
		loneratio = self.config.getFloatConfig("train.loneratio")[0]
		normalize = self.config.getBooleanConfig("train.normalize")[0]
		precompute = self.config.getBooleanConfig("train.precompute")[0]
		maxIter = self.config.getIntConfig("train.max.iter")[0]
		tol = self.config.getFloatConfig("train.tol")[0]
		randState = self.config.getIntConfig("train.random.state")[0]
		selection = self.config.getIntConfig("train.selection")[0]
		
		self.regressor =  ElasticNet(alpha=alpha, l1_ratio=loneratio, normalize=normalize, precompute=precompute, 
		max_iter=maxIter, tol=tol, random_state=randState, selection=selection)