forked from norm4nn/Snus-Solutions
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
47 lines (35 loc) · 1.33 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from typing import List
from sklearn.preprocessing import MinMaxScaler
import torch
from sklearn.preprocessing import LabelEncoder
from array import array
import numpy as np
import pandas as pd
data = pd.read_csv("customers.csv")
mapping_string_to_int = {}
label_encoder = LabelEncoder()
string_fields = ['Gender_x', 'Profession', 'Spending_Score', 'Ever_Married', 'Graduated', 'Var_1']
for string_field in string_fields:
old = data[string_field].copy()
data[string_field] = label_encoder.fit_transform(data[string_field])
new = data[string_field]
pairs = zip(old, new)
for key, val in pairs:
mapping_string_to_int[key] = val
scaler = MinMaxScaler()
mins_and_ranges = []
for field_name in data.columns:
data[field_name] = scaler.fit_transform(data[[field_name]])
mins_and_ranges.append((scaler.data_min_, scaler.data_range_))
def map_customer(customer: List):
strings_fields_idx = [0, 2, 6, 7, 8]
l = len(customer)
for i in strings_fields_idx:
customer[i] = mapping_string_to_int[customer[i]]
for i in range(len(customer)):
customer[i] -= mins_and_ranges[i][0]
customer[i] /= mins_and_ranges[i][1]
customer[i] = min(0.99, customer[i])
customer[i] = max(0, customer[i])
customer = torch.tensor(customer).to(torch.float32)
return customer.reshape((1, l))