change to python3

andy · andy · commit 01c12f564406 · 2017-10-12T15:42:56.000+08:00
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # LSTM + CTC + Tensorflow Example
 
-This is a demo using lstm and ctc to recognize a picture of  a series numbers with blanks all at once.
+This is a demo using lstm and ctc to recognize a picture of  a series numbers with blanks all at once. The code is compatible with Python3.
 
 For example:given the piture below the model would give result `73791096754314441539`.
 
@@ -9,23 +9,31 @@ For example:given the piture below the model would give result `7379109675431444
 
 ## Installation
 ```
+# on mac
+pip install pillow
 pip install opencv-python
 brew install cmake
 brew tap homebrew/science
 brew install opencv
 sh ./prepare_train_data.sh
 ```
+
+```
+# on ubuntu
+pip intall pillow
+pip install opencv-python
+pip install tensorflow-gpu
+```
+
+
 The `prepare_train_data.sh` script would download the [SUN database](http://vision.princeton.edu/projects/2010/SUN/SUN397.tar.gz) and extract the pitures to bgs dir. Then you can run `python gen.py` to generate test and train dir.
 
 When the train and test data set are ready you can start the train process by `nohup python lstm_and_ctc_ocr_train.py `.
 
 ## Requirements
 
-- Python 2.7+
+- Python 2.7+ / Python 3.5+
 - Tensorflow 1.0+
-- python_speech_features
-- numpy
-- scipy
 
 ##
 ## License
diff --git a/common.py b/common.py
@@ -32,50 +32,52 @@
 import time
 
 SPACE_INDEX = 0
-FIRST_INDEX = ord('0') - 1  # 0 is reserved to space
+# FIRST_INDEX = ord('0') - 1  # 0 is reserved to space
+FIRST_INDEX = 1  # 0 is reserved to space
 
 SPACE_TOKEN = '<space>'
 
 __all__ = (
     'DIGITS',
     'sigmoid',
     'softmax',
+    'CHARS'
 )
 
-OUTPUT_SHAPE = (64, 256)
 
 DIGITS = "0123456789"
-# LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 
+CHARS = list(DIGITS + LETTERS)
 
-CHARS = DIGITS
-LENGTH = 16
-LENGTHS = [16, 20] # the number of digits varies from LENGTHS[0] to LENGTHS[1] in a image
-TEST_SIZE = 200
-ADD_BLANK = True   # if add a blank between digits
+LENGTHS = [6, 6]  # the number of digits varies from LENGTHS[0] to LENGTHS[1] in a image
+TEST_SIZE = 100
+ADD_BLANK = True  # if add a blank between digits
 LEARNING_RATE_DECAY_FACTOR = 0.9  # The learning rate decay factor
 INITIAL_LEARNING_RATE = 1e-3
 DECAY_STEPS = 5000
 
 # parameters for bdlstm ctc
 BATCH_SIZE = 64
-BATCHES = 10
+BATCHES = 100
+
+OUTPUT_SHAPE = (BATCH_SIZE, 256)
 
 TRAIN_SIZE = BATCH_SIZE * BATCHES
 
 MOMENTUM = 0.9
-REPORT_STEPS = 100
+REPORT_STEPS = 1000
 
 # Hyper-parameters
-num_epochs = 200
-num_hidden = 64
-num_layers = 1
+num_epochs = 2000
+num_hidden = 128
+num_layers = 2
 
 # Some configs
 # Accounting the 0th indice +  space + blank label = 28 characters
 # num_classes = ord('9') - ord('0') + 1 + 1 + 1
-num_classes = len(DIGITS) + 1 + 1  # 10 digits + blank + ctc blank
-print num_classes
+num_classes = len(CHARS) + 1 + 1  # 10 digits + blank + ctc blank
+print(num_classes)
 
 
 def softmax(a):
@@ -96,8 +98,8 @@ def sigmoid(a):
 def load_data_set(dirname):
     fname_list = glob.glob(dirname + "/*.png")
     result = dict()
+    print("loading", dirname)
     for fname in sorted(fname_list):
-        print "loading", fname
         im = cv2.imread(fname)[:, :, 0].astype(numpy.float32) / 255.
         code = list(fname.split("/")[1].split("_")[1])
         index = fname.split("/")[1].split("_")[0]
@@ -108,7 +110,7 @@ def load_data_set(dirname):
 def read_data_for_lstm_ctc(dirname, start_index=None, end_index=None):
     start = time.time()
     fname_list = []
-    if not data_set.has_key(dirname):
+    if dirname not in data_set.keys():
         load_data_set(dirname)
 
     if start_index is None:
@@ -127,12 +129,16 @@ def read_data_for_lstm_ctc(dirname, start_index=None, end_index=None):
         # im = cv2.imread(fname)[:, :, 0].astype(numpy.float32) / 255.
         # code = list(fname.split("/")[1].split("_")[1])
         im, code = dir_data_set.get(fname)
-        yield im, numpy.asarray([SPACE_INDEX if x == SPACE_TOKEN else (ord(x) - FIRST_INDEX) for x in list(code)])
+        yield im, numpy.asarray(
+            [SPACE_INDEX if x == SPACE_TOKEN else (CHARS.index(x) + FIRST_INDEX) for x in list(code)])
         # print("get time ", time.time() - start)
 
 
+#        print numpy.asarray([SPACE_INDEX if x == SPACE_TOKEN else (CHARS.index(x) + FIRST_INDEX) for x in list(code)])
+
+
 def convert_original_code_train_code(code):
-    return numpy.asarray([SPACE_INDEX if x == SPACE_TOKEN else (ord(x) - FIRST_INDEX) for x in code])
+    return numpy.asarray([SPACE_INDEX if x == SPACE_TOKEN else (CHARS.index(x) - FIRST_INDEX) for x in code])
 
 
 def unzip(b):
@@ -144,9 +150,9 @@ def unzip(b):
 
 if __name__ == '__main__':
     train_inputs, train_codes = unzip(list(read_data_for_lstm_ctc("test"))[:2])
-    print train_inputs.shape
-    print train_codes
+    print(train_inputs.shape)
+    print(train_codes)
     print("train_codes", train_codes)
     targets = np.asarray(train_codes).flat[:]
-    print targets
-    print list(read_data_for_lstm_ctc("test", 0, 10))
+    print(targets)
+    print(list(read_data_for_lstm_ctc("test", 0, 10)))
diff --git a/detect.py b/detect.py
@@ -53,6 +53,6 @@ def detect(test_inputs, test_targets, test_seq_len):
 
 if __name__ == '__main__':
     test_inputs, test_targets, test_seq_len = utils.get_data_set('small_test')
-    print test_inputs[0].shape
-    print detect(test_inputs, test_targets, test_seq_len)
+    print(test_inputs[0].shape)
+    print(detect(test_inputs, test_targets, test_seq_len))
    # print_tensors_in_checkpoint_file("model/ocr.model.50", None)
diff --git a/extractbgs.py b/extractbgs.py
@@ -85,7 +85,6 @@ def members():
         if im.shape[0] > 256:
             im = cv2.resize(im, (256, 256))
         fname = "bgs/{:08}.jpg".format(index)
-        print fname
         rc = cv2.imwrite(fname, im)
         if not rc:
             raise Exception("Failed to write file {}".format(fname))
diff --git a/gen.py b/gen.py
@@ -45,13 +45,15 @@
 
 import common
 from common import OUTPUT_SHAPE
-fonts = ["fonts/Farrington-7B-Qiqi.ttf", "fonts/Arial.ttf", "fonts/times.ttf"]
-# fonts = ["fonts/times.ttf"]
+
+# fonts = ["fonts/Farrington-7B-Qiqi.ttf", "fonts/Arial.ttf", "fonts/times.ttf"]
+fonts = ["fonts/times.ttf"]
 FONT_HEIGHT = 32  # Pixel size to which the chars are resized
 
-CHARS = common.CHARS + " "
 
 
+CHARS=common.CHARS[:]
+CHARS.append(" ")
 def make_char_ims(output_height, font):
     font_size = output_height * 4
     font = ImageFont.truetype(font, font_size)
@@ -166,7 +168,7 @@ def generate_code():
     for i in range(length):
         if 0 == i % 4 and append_blank:
             f = f + blank
-        f = f + random.choice(common.DIGITS)
+        f = f + random.choice(common.CHARS)
     return f
 
 
@@ -283,5 +285,5 @@ def generate_ims(num_images):
         im_gen = generate_ims(size.get(dir_name))
         for img_idx, (im, c, p) in enumerate(im_gen):
             fname = dir_name + "/{:08d}_{}_{}.png".format(img_idx, c, "1" if p else "0")
-            print '\''+fname+'\','
+            print('\'' + fname + '\',')
             cv2.imwrite(fname, im * 255.)
diff --git a/gen_no_plate_shape_version.py b/gen_no_plate_shape_version.py
@@ -50,8 +50,8 @@
 # fonts = ["fonts/times.ttf"]
 FONT_HEIGHT = 32  # Pixel size to which the chars are resized
 
-CHARS = common.CHARS + " "
-
+CHARS=common.CHARS[:]
+CHARS.append(" ")
 
 def make_char_ims(output_height, font):
     font_size = output_height * 4
@@ -292,5 +292,5 @@ def generate_ims(num_images):
         im_gen = generate_ims(size.get(dir_name))
         for img_idx, (im, c, p) in enumerate(im_gen):
             fname = dir_name + "/{:08d}_{}_{}.png".format(img_idx, c, "1" if p else "0")
-            print '\'' + fname + '\','
+            print('\'' + fname + '\',')
             cv2.imwrite(fname, im * 255.)
diff --git a/lstm_and_ctc_ocr_train.py b/lstm_and_ctc_ocr_train.py
@@ -16,7 +16,8 @@
 
 # Some configs
 # Accounting the 0th indice +  space + blank label = 28 characters
-num_classes = ord('9') - ord('0') + 1 + 1 + 1
+# num_classes = ord('9') - ord('0') + 1 + 1 + 1
+num_classes = common.num_classes
 print("num_classes", num_classes)
 # Hyper-parameters
 num_epochs = 10000
@@ -59,7 +60,7 @@ def train():
                                                staircase=True)
     logits, inputs, targets, seq_len, W, b = model.get_train_model()
 
-    loss = tf.nn.ctc_loss( targets, logits, seq_len)
+    loss = tf.nn.ctc_loss(targets, logits, seq_len)
     cost = tf.reduce_mean(loss)
 
     optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
@@ -89,25 +90,27 @@ def do_batch():
         if steps > 0 and steps % common.REPORT_STEPS == 0:
             do_report()
             save_path = saver.save(session, "models/ocr.model", global_step=steps)
-            # print(save_path)
+            #print(save_path)
         return b_cost, steps
 
-    with tf.Session() as session:
+    config = tf.ConfigProto()
+    config.gpu_options.allow_growth = True
+    with tf.Session(config=config) as session:
         session.run(init)
         saver = tf.train.Saver(tf.global_variables(), max_to_keep=100)
-        for curr_epoch in xrange(num_epochs):
+        for curr_epoch in range(num_epochs):
             # variables = tf.all_variables()
             # for i in variables:
             #     print(i.name)
 
             print("Epoch.......", curr_epoch)
             train_cost = train_ler = 0
-            for batch in xrange(common.BATCHES):
+            for batch in range(common.BATCHES):
                 start = time.time()
                 train_inputs, train_targets, train_seq_len = utils.get_data_set('train', batch * common.BATCH_SIZE,
                                                                                 (batch + 1) * common.BATCH_SIZE)
 
-                print("get data time", time.time() - start)
+                #print("get data time", time.time() - start)
                 start = time.time()
                 c, steps = do_batch()
                 train_cost += c * common.BATCH_SIZE
@@ -116,7 +119,6 @@ def do_batch():
 
             train_cost /= common.TRAIN_SIZE
             # train_ler /= common.TRAIN_SIZE
-
             val_feed = {inputs: train_inputs,
                         targets: train_targets,
                         seq_len: train_seq_len}
diff --git a/model.py b/model.py
@@ -36,7 +36,6 @@ def avg_pool(x, ksize=(2, 2), stride=(2, 2)):
 def convolutional_layers():
     """
     Get the convolutional layers of the model.
-
     """
 
     inputs = tf.placeholder(tf.float32, [None, None, common.OUTPUT_SHAPE[0]])
@@ -71,15 +70,20 @@ def convolutional_layers():
     features = tf.nn.relu(tf.matmul(conv_layer_flat, W_fc1) + b_fc1)
     shape = tf.shape(features)
     features = tf.reshape(features, [shape[0], common.OUTPUT_SHAPE[1], 1])  # batchsize * outputshape * 1
-    return features
+    return inputs, features
+
+
+def lstm_cell():
+    return tf.contrib.rnn.LSTMCell(common.num_hidden)
 
 
 def get_train_model():
     # Has size [batch_size, max_stepsize, num_features], but the
     # batch_size and max_stepsize can vary along each step
-    #features = convolutional_layers()
-    #print features.get_shape()
-    inputs = tf.placeholder(tf.float32, [None, None, common.OUTPUT_SHAPE[0]])
+    inputs, features = convolutional_layers()
+    # print features.get_shape()
+
+    # inputs = tf.placeholder(tf.float32, [None, None, common.OUTPUT_SHAPE[0]])
 
     # Here we use sparse_placeholder that will generate a
     # SparseTensor required by ctc_loss op.
@@ -92,16 +96,16 @@ def get_train_model():
     # Can be:
     #   tf.nn.rnn_cell.RNNCell
     #   tf.nn.rnn_cell.GRUCell
-    cell = tf.contrib.rnn.core_rnn_cell.LSTMCell(common.num_hidden, state_is_tuple=True)
+    # cell = tf.contrib.rnn.LSTMCell(common.num_hidden, state_is_tuple=True)
 
     # Stacking rnn cells
-    stack = tf.contrib.rnn.core_rnn_cell.MultiRNNCell([cell] * common.num_layers,
+    stack = tf.contrib.rnn.MultiRNNCell([lstm_cell() for _ in range(0, common.num_layers)],
                                         state_is_tuple=True)
 
     # The second output is the last state and we will no use that
-    outputs, _ = tf.nn.dynamic_rnn(cell, inputs, seq_len, dtype=tf.float32)
+    outputs, _ = tf.nn.dynamic_rnn(stack, features, seq_len, dtype=tf.float32)
 
-    shape = tf.shape(inputs)
+    shape = tf.shape(features)
     batch_s, max_timesteps = shape[0], shape[1]
 
     # Reshaping to apply the same weights over the timesteps
diff --git a/models/README b/models/README
diff --git a/test.py b/test.py
@@ -7,9 +7,10 @@
 import utils
 
 __author__ = "andy"
-for batch in xrange(common.BATCHES):
-    train_inputs, train_targets, train_seq_len = utils.get_data_set('train', batch*common.BATCH_SIZE, (batch + 1) * common.BATCH_SIZE)
-    print batch, train_inputs.shape
-   # pickle_file = 'test/test.pickle' + str(batch)
-   # f = open(pickle_file, 'wb')
-   # pickle.dump(batch_data, f, pickle.HIGHEST_PROTOCOL)
+a = ['a','b','c','d']
+print (a.index('d'))
+
+#for batch in xrange(common.BATCHES):
+#    train_inputs, train_targets, train_seq_len = utils.get_data_set('train', batch*common.BATCH_SIZE, (batch + 1) * common.BATCH_SIZE)
+#    print batch, train_inputs.shape
+
diff --git a/utils.py b/utils.py