Mark input types and build a feature map

parent 6f173615
...@@ -9,7 +9,36 @@ from tensorflow.keras.layers.experimental import preprocessing ...@@ -9,7 +9,36 @@ from tensorflow.keras.layers.experimental import preprocessing
dataframe = pd.read_csv('Database_Final.csv') dataframe = pd.read_csv('Database_Final.csv')
print(dataframe.head()) print(dataframe.head())
dataframe['target'] = dataframe['Remaining space']
dataframe = dataframe.drop(columns=['Unnamed: 0','Remaining space'])
# dataframe.head()
train, test = train_test_split(dataframe, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)
print(len(train), 'train examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')
def df_to_dataset(dataframe, label_column, shuffle=True, batch_size=32):
dataframe = dataframe.copy()
labels = dataframe.pop(label_column)
#labels = dataframe[label_column]
ds = tf.data.Dataset.from_tensor_slices((dataframe.to_dict(orient='list'), labels))
if shuffle:
ds = ds.shuffle(buffer_size=len(dataframe))
ds = ds.batch(batch_size)
return ds
batch_size = 5
train_ds = df_to_dataset(train,'target',batch_size=batch_size)
[(train_features, label_batch)] = train_ds.take(1)
print('Every feature:', list(train_features.keys()))
print('A batch of id:', train_features['ID'])
print('A batch of targets:', label_batch )
def get_normalization_layer(name, dataset): def get_normalization_layer(name, dataset):
# Create a Normalization layer for our feature. # Create a Normalization layer for our feature.
...@@ -23,7 +52,42 @@ def get_normalization_layer(name, dataset): ...@@ -23,7 +52,42 @@ def get_normalization_layer(name, dataset):
return normalizer return normalizer
def get_category_encoding_layer(name, dataset, dtype, max_tokens=None):
# Create a StringLookup layer which will turn strings into integer indices
if dtype == 'string':
index = preprocessing.StringLookup(max_tokens=max_tokens)
else:
index = preprocessing.IntegerLookup(max_tokens=max_tokens)
# Prepare a Dataset that only yields our feature
feature_ds = dataset.map(lambda x, y: x[name])
# Learn the set of possible values and assign them a fixed integer index.
index.adapt(feature_ds)
# Create a Discretization for our integer indices.
encoder = preprocessing.CategoryEncoding(num_tokens=index.vocabulary_size())
# Apply one-hot encoding to our indices. The lambda function captures the
# layer so we can use them, or include them in the functional model later.
return lambda feature: encoder(index(feature))
batch_size = 256
train_ds = df_to_dataset(train,'target', batch_size=batch_size)
val_ds = df_to_dataset(val,'target', shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test,'target', shuffle=False, batch_size=batch_size)
all_inputs = []
encoded_features = []
# Numeric features.
for header in ['Entry', 'Exit']:
numeric_col = tf.keras.Input(shape=(1,), name=header)
normalization_layer = get_normalization_layer(header, train_ds)
encoded_numeric_col = normalization_layer(numeric_col)
all_inputs.append(numeric_col)
encoded_features.append(encoded_numeric_col)
categorical_cols = ['Date', 'Time', 'Weather', 'Parking'] categorical_cols = ['Date', 'Time', 'Weather', 'Parking']
for header in categorical_cols: for header in categorical_cols:
categorical_col = tf.keras.Input(shape=(1,), name=header, dtype='string') categorical_col = tf.keras.Input(shape=(1,), name=header, dtype='string')
...@@ -31,4 +95,21 @@ for header in categorical_cols: ...@@ -31,4 +95,21 @@ for header in categorical_cols:
max_tokens=5) max_tokens=5)
encoded_categorical_col = encoding_layer(categorical_col) encoded_categorical_col = encoding_layer(categorical_col)
all_inputs.append(categorical_col) all_inputs.append(categorical_col)
encoded_features.append(encoded_categorical_col) encoded_features.append(encoded_categorical_col)
\ No newline at end of file
all_features = tf.keras.layers.concatenate(encoded_features)
x = tf.keras.layers.Dense(64, activation="relu")(all_features)
x = tf.keras.layers.Dropout(0.5)(x)
output = tf.keras.layers.Dense(1)(x)
model = tf.keras.Model(all_inputs, output)
model.compile(optimizer='adam',
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=["accuracy"])
# rankdir='LR' is used to make the graph horizontal.
# tf.keras.utils.plot_model(model, show_shapes=True, rankdir="LR")
model.fit(train_ds, epochs=1, validation_data=val_ds)
loss, accuracy = model.evaluate(test_ds)
print("Accuracy", accuracy*100, '%')
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment