Spaces:

ThirdEyeData
/

Customer-Conversion-Prediction

Runtime error

App Files Files Community

Customer-Conversion-Prediction / matumizi /examples /fesel.py

Priyanka-Kumavat-At-TE

Upload 20 files

3d0e51e about 2 years ago

raw

history blame

7.66 kB

	#!/usr/local/bin/python3

	# Author: Pranab Ghosh
	#
	# Licensed under the Apache License, Version 2.0 (the "License"); you
	# may not use this file except in compliance with the License. You may
	# obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
	# implied. See the License for the specific language governing
	# permissions and limitations under the License.

	# Package imports
	import os
	import sys
	import random
	import statistics
	import matplotlib.pyplot as plt
	import argparse
	from matumizi.util import *
	from matumizi.mlutil import *
	from matumizi.daexp import *
	from matumizi.sampler import *

	NFEAT = 11
	NFEAT_EXT = 14

	class LoanApprove:
	def __init__(self, numLoans=None):
	self.numLoans = numLoans
	self.marStatus = ["married", "single", "divorced"]
	self.loanTerm = ["7", "15", "30"]
	self.addExtra = False


	def initTwo(self):
	"""
	initialize samplers
	"""
	self.approvDistr = CategoricalRejectSampler(("1", 60), ("0", 40))
	self.featCondDister = {}

	#marital status
	key = ("1", 0)
	distr = CategoricalRejectSampler(("married", 100), ("single", 60), ("divorced", 40))
	self.featCondDister[key] = distr
	key = ("0", 0)
	distr = CategoricalRejectSampler(("married", 40), ("single", 100), ("divorced", 40))
	self.featCondDister[key] = distr


	# num of children
	key = ("1", 1)
	distr = CategoricalRejectSampler(("1", 100), ("2", 90), ("3", 40))
	self.featCondDister[key] = distr
	key = ("0", 1)
	distr = CategoricalRejectSampler(("1", 50), ("2", 70), ("3", 100))
	self.featCondDister[key] = distr

	# education
	key = ("1", 2)
	distr = CategoricalRejectSampler(("1", 30), ("2", 80), ("3", 100))
	self.featCondDister[key] = distr
	key = ("0", 2)
	distr = CategoricalRejectSampler(("1", 100), ("2", 40), ("3", 30))
	self.featCondDister[key] = distr

	#self employed
	key = ("1", 3)
	distr = CategoricalRejectSampler(("1", 40), ("0", 100))
	self.featCondDister[key] = distr
	key = ("0", 3)
	distr = CategoricalRejectSampler(("1", 100), ("0", 30))
	self.featCondDister[key] = distr

	# income
	key = ("1", 4)
	distr = GaussianRejectSampler(120,15)
	self.featCondDister[key] = distr
	key = ("0", 4)
	distr = GaussianRejectSampler(50,10)
	self.featCondDister[key] = distr

	# years of experience
	key = ("1", 5)
	distr = GaussianRejectSampler(15,3)
	self.featCondDister[key] = distr
	key = ("0", 5)
	distr = GaussianRejectSampler(5,1)
	self.featCondDister[key] = distr

	# number of years in current job
	key = ("1", 6)
	distr = GaussianRejectSampler(3,.5)
	self.featCondDister[key] = distr
	key = ("0", 6)
	distr = GaussianRejectSampler(1,.2)
	self.featCondDister[key] = distr

	# outstanding debt
	key = ("1", 7)
	distr = GaussianRejectSampler(20,5)
	self.featCondDister[key] = distr
	key = ("0", 7)
	distr = GaussianRejectSampler(60,10)
	self.featCondDister[key] = distr

	# loan amount
	key = ("1", 8)
	distr = GaussianRejectSampler(300,50)
	self.featCondDister[key] = distr
	key = ("0", 8)
	distr = GaussianRejectSampler(600,50)
	self.featCondDister[key] = distr

	# loan term
	key = ("1", 9)
	distr = CategoricalRejectSampler(("7", 100), ("15", 40), ("30", 60))
	self.featCondDister[key] = distr
	key = ("0", 9)
	distr = CategoricalRejectSampler(("7", 30), ("15", 100), ("30", 60))
	self.featCondDister[key] = distr

	# credit score
	key = ("1", 10)
	distr = GaussianRejectSampler(700,20)
	self.featCondDister[key] = distr
	key = ("0", 10)
	distr = GaussianRejectSampler(500,50)
	self.featCondDister[key] = distr

	if self.addExtra:
	# saving
	key = ("1", 11)
	distr = NormalSampler(80,10)
	self.featCondDister[key] = distr
	key = ("0", 11)
	distr = NormalSampler(60,8)
	self.featCondDister[key] = distr

	# retirement
	zDistr = NormalSampler(0, 0)
	key = ("1", 12)
	sDistr = DiscreteRejectSampler(0,1,1,20,80)
	nzDistr = NormalSampler(100,20)
	distr = DistrMixtureSampler(sDistr, zDistr, nzDistr)
	self.featCondDister[key] = distr
	key = ("0", 12)
	sDistr = DiscreteRejectSampler(0,1,1,50,50)
	nzDistr = NormalSampler(40,10)
	distr = DistrMixtureSampler(sDistr, zDistr, nzDistr)
	self.featCondDister[key] = distr

	#num of prior mortgae loans
	key = ("1", 13)
	distr = DiscreteRejectSampler(0,3,1,20,60,40,15)
	self.featCondDister[key] = distr
	key = ("0", 13)
	distr = DiscreteRejectSampler(0,1,1,70,30)
	self.featCondDister[key] = distr


	def generateTwo(self, noise, keyLen, addExtra):
	"""
	ancestral sampling
	"""
	self.addExtra = addExtra
	self.initTwo()

	#error
	erDistr = GaussianRejectSampler(0, noise)

	#sampler
	numChildren = NFEAT_EXT if self.addExtra else NFEAT
	sampler = AncestralSampler(self.approvDistr, self.featCondDister, numChildren)

	for i in range(self.numLoans):
	(claz, features) = sampler.sample()

	# add noise
	features[4] = int(features[4])
	features[7] = int(features[7])
	features[8] = int(features[8])
	features[10] = int(features[10])
	if self.addExtra:
	features[11] = int(features[11])
	features[12] = int(features[12])

	claz = addNoiseCat(claz, ["0", "1"], noise)

	strFeatures = list(map(lambda f: toStr(f, 2), features))
	rec = genID(keyLen) + "," + ",".join(strFeatures) + "," + claz
	print (rec)

	def encodeDummy(self, fileName, extra):
	"""
	dummy var encoding
	"""
	catVars = {}
	catVars[1] = self.marStatus
	catVars[10] = self.loanTerm
	rSize = NFEAT_EXT if extra else NFEAT
	rSize += 2
	dummyVarGen = DummyVarGenerator(rSize, catVars, "1", "0", ",")
	for row in fileRecGen(fileName, None):
	newRow = dummyVarGen.processRow(row)
	print (newRow)

	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument('--op', type=str, default = "none", help = "operation")
	parser.add_argument('--nloan', type=int, default = 1000, help = "nom of loans")
	parser.add_argument('--noise', type=float, default = 0.1, help = "nom of loans")
	parser.add_argument('--klen', type=int, default = 1000, help = "key length")
	parser.add_argument('--fpath', type=str, default = "none", help = "source file path")
	parser.add_argument('--algo', type=str, default = "none", help = "source file path")
	args = parser.parse_args()
	op = args.op

	if op == "gen":
	""" generate data """
	numLoans = args.nloan
	loan = LoanApprove(numLoans)
	noise = args.noise
	keyLen = args.klen
	addExtra = True
	loan.generateTwo(noise, keyLen, addExtra)

	elif op == "encd":
	""" encode binary """
	fileName = args.fpath
	extra = True
	loan = LoanApprove()
	loan.encodeDummy(fileName, extra)


	elif op == "fsel":
	""" feature select """
	fpath = args.fpath
	algo = args.algo
	expl = DataExplorer(False)
	expl.addFileNumericData(fpath, 5, 8, 11, 12, "income", "debt", "crscore", "saving")
	expl.addFileCatData(fpath, 3, 4, 15, "education", "selfemp", "target")

	fdt = ["education", "cat", "selfemp", "cat", "income", "num", "debt", "num", "crscore", "num"]
	tdt = ["target", "cat"]
	if args.algo == "mrmr":
	res = expl.getMaxRelMinRedFeatures(fdt, tdt, 3)
	elif args.algo == "jmi":
	res = expl.getJointMutInfoFeatures(fdt, tdt, 3)
	elif args.algo == "cmim":
	res = expl.getCondMutInfoMaxFeatures(fdt, tdt, 3)
	elif args.algo == "icap":
	res = expl.getInteractCapFeatures(fdt, tdt, 3)
	elif args.algo == "infg":
	res = expl.getInfoGainFeatures(fdt, tdt, 3, 8)

	print(res)
	else:
	exitWithMsg("invalid command")