Commit cec5ae5e by Tristan Kreuziger

Updated repository to handle BigEarthNet-19 as well

parent 063ae455
......@@ -41,20 +41,26 @@ BAND_STATS = {
}
class BigEarthNet:
def __init__(self, TFRecord_paths, batch_size, nb_epoch, shuffle_buffer_size):
def __init__(self, TFRecord_paths, batch_size, nb_epoch, shuffle_buffer_size, label_type):
self.label_type = label_type
dataset = tf.data.TFRecordDataset(TFRecord_paths)
if shuffle_buffer_size > 0:
dataset = dataset.shuffle(buffer_size=shuffle_buffer_size)
dataset = dataset.repeat(nb_epoch)
dataset = dataset.map(
self.parse_function, num_parallel_calls=10)
lambda x: self.parse_function(x, self.label_type),
num_parallel_calls=10
)
dataset = dataset.batch(batch_size, drop_remainder=False)
self.dataset = dataset.prefetch(10)
self.batch_iterator = self.dataset.make_one_shot_iterator()
def parse_function(self, example_proto):
def parse_function(self, example_proto, label_type):
nb_class = 43 if label_type == 'original' else 19
parsed_features = tf.parse_single_example(
example_proto,
{
......@@ -70,9 +76,9 @@ class BigEarthNet:
'B09': tf.FixedLenFeature([20*20], tf.int64),
'B11': tf.FixedLenFeature([60*60], tf.int64),
'B12': tf.FixedLenFeature([60*60], tf.int64),
'original_labels': tf.VarLenFeature(dtype=tf.string),
'original_labels_multi_hot': tf.FixedLenFeature([43], tf.int64),
'patch_name': tf.VarLenFeature(dtype=tf.string)
'patch_name': tf.VarLenFeature(dtype=tf.string),
label_type + '_labels': tf.VarLenFeature(dtype=tf.string),
label_type + '_labels_multi_hot': tf.FixedLenFeature([nb_class], tf.int64)
}
)
......@@ -89,7 +95,7 @@ class BigEarthNet:
'B09': tf.reshape(parsed_features['B09'], [20, 20]),
'B11': tf.reshape(parsed_features['B11'], [60, 60]),
'B12': tf.reshape(parsed_features['B12'], [60, 60]),
'original_labels_multi_hot': parsed_features['original_labels_multi_hot'],
'original_labels': parsed_features['original_labels'],
'patch_name': parsed_features['patch_name']
'patch_name': parsed_features['patch_name'],
label_type + '_labels': parsed_features[label_type + '_labels'],
label_type + '_labels_multi_hot': parsed_features[label_type + '_labels_multi_hot']
}
\ No newline at end of file
# BigEarthNet Deep Learning Models With TensorFlow
This repository contains code to use deep learning models, pre-trained on the [BigEarthNet archive](http://bigearth.net/) with TensorFlow, to train new models, and to evaluate pre-trained models. It is recommended to first check the [BigEarthNet Deep Learning Models repository](https://gitlab.tu-berlin.de/rsim/bigearthnet-models).
This repository contains code to use deep learning models, pre-trained on the [BigEarthNet archive](http://bigearth.net/) with TensorFlow, to train new models, and to evaluate pre-trained models. Note that in addition to original BigEarthNet labels (will be called `original` in the code), there is a new class nomenclature (will be called `BigEarthNet-19` in the code). This repository is compatible with both options.
* For original BigEarthNet labels, it is highly recommended to first check the [BigEarthNet Deep Learning Models repository](https://gitlab.tu-berlin.de/rsim/bigearthnet-models).
* For BigEarthNet-19 labels, it is highly recommended to first check the [BigEarthNet-19 Deep Learning Models repository](https://gitlab.tu-berlin.de/rsim/bigearthnet-19-models).
## Prerequisites
* The definitions of ResNet and VGG models are based on their TensorFlow-Slim implementations. Thus, you need to first download the `nets` folder of [TensorFlow-Slim repository](https://github.com/tensorflow/models/tree/master/research/slim/nets) to the root folder.
* The `prep_splits.py` script from [here](https://gitlab.tu-berlin.de/rsim/bigearthnet-models/blob/master/prep_splits.py) generates `TFRecord` files for train, validation and test sets from the BigEarthNet archive. To train or evaluate any model, required TFRecord files should be first prepared.
* The `prep_splits.py` script from [here](https://gitlab.tu-berlin.de/rsim/bigearthnet-models/blob/master/prep_splits.py) for original labels and the `prep_splits_BigEarthNet-19.py` script from [here](https://gitlab.tu-berlin.de/rsim/bigearthnet-19-models/blob/master/prep_splits.py) for BigEarthNet-19 labels generate `TFRecord` files for train, validation and test sets from the BigEarthNet archive. To train or evaluate any model, required TFRecord files should be first prepared.
* Config files of each model, which was used to train corresponding model, are given under `configs` folder. If you want to use those config files, you need to download pre-trained model weights and move to the folders (whose paths are written in the corresponding JSON file).
* TensorFlow package should be installed. All codes are tested with Python 2.7, TensorFlow 1.3 and Ubuntu 16.04.
## Training
The script `train.py` expects a `JSON` configuration file path as a comand line argument. This file contains the following parameters:
* `model_name`: The name of the Python code containing the corresponding deep learning model. The code must be located under the `models` directory. The model class will be loaded dynamically based on the `model_name` parameter: `model = importlib.import_module('models.' + args['model_name']).dnn_model(nb_class)`
* `label_type`: A flag to indicate which labels will be used during training: `original` or `BigEarthNet-19`
* `batch_size`: Batch size used during training
* `nb_epoch`: The number of epochs for the training
* `learning_rate`: The initial learning rate
* `out_dir`: The path where all log files and checkpoints will be saved.
* `save_checkpoint_after_iteration`: The iteration after which checkpoint saving should start, i.e., no checkpoints are saved before. Set to zero to always have checkpoints saved.
* `save_checkpoint_per_iteration`: The number of iterations per which a checkpoint is written, i.e., when `iteration_index % save_checkpoint_per_iteration == 0`.
* `tr_tf_record_files`: An array containing `TFRecord` files for training.
* `val_tf_record_files`: An array containing `TFRecord` files for validation (not used for now).
* `tr_tf_record_files`: An array containing `TFRecord` file(s) for training.
* `val_tf_record_files`: An array containing `TFRecord` file(s) for validation (not used for now).
* `fine_tune`: A flag to indicate if the training of the model will continue from the existing checkpoint whose path will be defined by `pretrained_model_path`.
* `model_file`: The base name of a pre-trained model snapshot (i.e., checkpoint).
* `shuffle_buffer_size`: The number of elements which will be shuffled at the beginning of each epoch. It is not recommended to have large shuffle buffer if you don't have enough space in memory.
......
{
"model_name": "K-BranchCNN",
"batch_size":1000,
"nb_epoch":100,
"learning_rate":1e-3,
"save_checkpoint_per_iteration": 300,
"save_checkpoint_after_iteration": 3000,
"out_dir": "model_weights/BigEarthNet-19_labels/K-BranchCNN",
"model_file": "model_weights/BigEarthNet-19_labels/K-BranchCNN/K-BranchCNN_BigEarthNet-19_labels",
"label_type": "BigEarthNet-19"
}
\ No newline at end of file
{
"model_name": "ResNet101",
"batch_size":500,
"nb_epoch":100,
"learning_rate":1e-3,
"save_checkpoint_per_iteration": 600,
"save_checkpoint_after_iteration": 6000,
"out_dir": "model_weights/BigEarthNet-19_labels/ResNet101",
"model_file": "model_weights/BigEarthNet-19_labels/ResNet101/ResNet101_BigEarthNet-19_labels",
"label_type": "BigEarthNet-19"
}
\ No newline at end of file
{
"model_name": "ResNet152",
"batch_size":256,
"nb_epoch":100,
"learning_rate":1e-3,
"save_checkpoint_per_iteration": 300,
"save_checkpoint_after_iteration": 3000,
"out_dir": "model_weights/BigEarthNet-19_labels/ResNet152",
"model_file": "model_weights/BigEarthNet-19_labels/ResNet152/ResNet152_BigEarthNet-19_labels",
"label_type": "BigEarthNet-19"
}
\ No newline at end of file
{
"model_name": "ResNet50",
"batch_size":500,
"nb_epoch":100,
"learning_rate":1e-3,
"save_checkpoint_per_iteration": 600,
"save_checkpoint_after_iteration": 6000,
"out_dir": "model_weights/BigEarthNet-19_labels/ResNet50",
"model_file": "model_weights/BigEarthNet-19_labels/ResNet50/ResNet50_BigEarthNet-19_labels",
"label_type": "BigEarthNet-19"
}
\ No newline at end of file
{
"model_name": "VGG16",
"batch_size":1000,
"nb_epoch":100,
"learning_rate":1e-3,
"save_checkpoint_per_iteration": 300,
"save_checkpoint_after_iteration": 3000,
"out_dir": "model_weights/BigEarthNet-19_labels/VGG16",
"model_file": "model_weights/BigEarthNet-19_labels/VGG16/VGG16_BigEarthNet-19_labels",
"label_type": "BigEarthNet-19"
}
\ No newline at end of file
{
"model_name": "VGG19",
"batch_size":1000,
"nb_epoch":100,
"learning_rate":1e-3,
"save_checkpoint_per_iteration": 300,
"save_checkpoint_after_iteration": 3000,
"out_dir": "model_weights/BigEarthNet-19_labels/VGG19",
"model_file": "model_weights/BigEarthNet-19_labels/VGG19/VGG19_BigEarthNet-19_labels",
"label_type": "BigEarthNet-19"
}
\ No newline at end of file
......@@ -28,7 +28,8 @@ def eval_model(args):
args['test_tf_record_files'],
args['batch_size'],
1,
0
0,
args['label_type']
).batch_iterator
nb_iteration = int(np.ceil(float(args['test_size']) / args['batch_size']))
iterator_ins = iterator.get_next()
......
You can find the links of pre-trained model weights trained with the BigEarthNet-19 multi-labels associated to the new class nomenclature on the [BigEarthNet-19 Deep Learning Models repository](https://gitlab.tu-berlin.de/rsim/bigearthnet-19-models). After downloading the zip files, you can directly extract each file to here that is compatible with existing configuration JSON files.
\ No newline at end of file
You can find the links of pre-trained model weights on the [BigEarthNet Deep Models repository](https://gitlab.tu-berlin.de/rsim/bigearthnet-models). After downloading the zip files, you can directly extract each file to here that is compatible with existing configuration JSON files.
\ No newline at end of file
You can find the links of pre-trained model weights trained with the original BigEarthNet multi-labels associated to Level-3 class nomenclature of CLC 2018 on the [BigEarthNet Deep Learning Models repository](https://gitlab.tu-berlin.de/rsim/bigearthnet-models). After downloading the zip files, you can directly extract each file to here that is compatible with existing configuration JSON files.
\ No newline at end of file
......@@ -118,12 +118,25 @@ class DNN_model(Model):
feature = self.dropout(out, 0.5, is_training, 'dropout_3')
return feature
def branch_model_60m(self, inputs, is_training):
with tf.variable_scope('CNN_60m_branch'):
out = self.conv_block(inputs, 32, [2,2], is_training, 'conv_block_0')
out = self.dropout(out, 0.25, is_training, 'dropout_0')
out = self.conv_block(out, 32, [2,2], is_training, 'conv_block_1')
out = self.dropout(out, 0.25, is_training, 'dropout_1')
out = self.conv_block(out, 32, [2,2], is_training, 'conv_block_2')
out = self.dropout(out, 0.25, is_training, 'dropout_2')
out = tf.contrib.layers.flatten(out)
out = self.fully_connected_block(out, self.feature_size, is_training, 'fc_block_0')
feature = self.dropout(out, 0.5, is_training, 'dropout_3')
return feature
def create_network(self):
branch_features = []
for img_bands, nb_bands, branch_model in zip(
[self.bands_10m, self.bands_20m],
[self.nb_bands_10m, self.nb_bands_20m],
[self.branch_model_10m, self.branch_model_20m]
[self.bands_10m, self.bands_20m, self.bands_60m],
[self.nb_bands_10m, self.nb_bands_20m, self.nb_bands_60m],
[self.branch_model_10m, self.branch_model_20m, self.branch_model_60m]
):
branch_features.append(tf.reshape(branch_model(img_bands, self.is_training), [-1, self.feature_size]))
......
This folder contains the implementation of several models of well-known architectures. Please note that to use the ResNet and VGG models, you need to download the corresponding folder of the TensorFlow-Slim image classification model library.
\ No newline at end of file
This folder contains the implementation of several models of well-known architectures. Please note that to use the ResNet and VGG models, you need to download the corresponding folder of the TensorFlow-Slim image classification model library.
\ No newline at end of file
......@@ -19,7 +19,7 @@ class Model:
self.label_type = label_type
self.prediction_threshold = 0.5
self.is_training = tf.placeholder(tf.bool, [])
self.nb_class = 19 if label_type == 'compact' else 43
self.nb_class = 19 if label_type == 'BigEarthNet-19' else 43
self.B01 = tf.placeholder(tf.float32, [None, 20, 20], name='B01')
self.B02 = tf.placeholder(tf.float32, [None, 120, 120], name='B02')
self.B03 = tf.placeholder(tf.float32, [None, 120, 120], name='B03')
......@@ -64,13 +64,13 @@ class Model:
multi_hot_label = batch_dict[
'original_labels_multi_hot'
].astype(np.float) if self.label_type == 'original' else batch_dict[
'compact_labels_multi_hot'
'BigEarthNet-19_labels_multi_hot'
].astype(np.float)
# Label and patch names can be read in the following way:
#
# original_labels = sparse_to_dense(batch_dict['original_labels'].indices, batch_dict['original_labels'].values)
# compact_labels = sparse_to_dense(batch_dict['compact_labels'].indices, batch_dict['compact_labels'].values)
# BigEarthNet-19_labels = sparse_to_dense(batch_dict['BigEarthNet-19_labels'].indices, batch_dict['BigEarthNet-19_labels'].values)
# patch_name = sparse_to_dense(batch_dict['patch_name'].indices, batch_dict['patch_name'].values)
return {
......
......@@ -38,7 +38,8 @@ def run_model(args):
args['tr_tf_record_files'],
args['batch_size'],
args['nb_epoch'],
args['shuffle_buffer_size']
args['shuffle_buffer_size'],
args['label_type']
).batch_iterator
nb_iteration = int(np.ceil(float(args['training_size'] * args['nb_epoch']) / args['batch_size']))
iterator_ins = iterator.get_next()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment