Skip to content

Commit

Permalink
Parameterized training set vs. dev set distribution
Browse files Browse the repository at this point in the history
  • Loading branch information
PayscaleNateW committed Nov 4, 2016
1 parent 9c161b5 commit f7cd6c0
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")
tf.flags.DEFINE_float("dev_sample_percentage", .1, "Percentage of the training data to use for validation")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
Expand Down Expand Up @@ -56,8 +57,9 @@

# Split train/test set
# TODO: This is very crude, should use cross-validation
x_train, x_dev = x_shuffled[:-1000], x_shuffled[-1000:]
y_train, y_dev = y_shuffled[:-1000], y_shuffled[-1000:]
dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y)))
x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:]
y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:]
print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

Expand Down

0 comments on commit f7cd6c0

Please sign in to comment.