Add pre-commit hook

nepeplwu · nepeplwu · commit 8f25506c7ca2 · 2019-08-26T21:51:47.000+08:00
diff --git a/.copyright.hook b/.copyright.hook
@@ -0,0 +1,121 @@
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import io, re
+import sys, os
+import subprocess
+import platform
+
+COPYRIGHT = '''
+  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+LANG_COMMENT_MARK = None
+
+NEW_LINE_MARK = None
+
+COPYRIGHT_HEADER = None
+
+if platform.system() == "Windows":
+    NEW_LINE_MARK = "\r\n"
+else:
+    NEW_LINE_MARK = '\n'
+    COPYRIGHT_HEADER = COPYRIGHT.split(NEW_LINE_MARK)[1]
+    p = re.search('(\d{4})', COPYRIGHT_HEADER).group(0)
+    process = subprocess.Popen(["date", "+%Y"], stdout=subprocess.PIPE)
+    date, err = process.communicate()
+    date = date.decode("utf-8").rstrip("\n")
+    COPYRIGHT_HEADER = COPYRIGHT_HEADER.replace(p, date)
+
+
+def generate_copyright(template, lang='C'):
+    if lang == 'Python':
+        LANG_COMMENT_MARK = '#'
+    else:
+        LANG_COMMENT_MARK = "//"
+
+    lines = template.split(NEW_LINE_MARK)
+    BLANK = " "
+    ans = LANG_COMMENT_MARK + BLANK + COPYRIGHT_HEADER + NEW_LINE_MARK
+    for lino, line in enumerate(lines):
+        if lino == 0 or lino == 1 or lino == len(lines) - 1: continue
+        if len(line)  == 0:
+            BLANK = ""
+        else:
+            BLANK = " "
+        ans += LANG_COMMENT_MARK + BLANK + line + NEW_LINE_MARK
+
+    return ans + "\n"
+
+
+def lang_type(filename):
+    if filename.endswith(".py"):
+        return "Python"
+    elif filename.endswith(".h"):
+        return "C"
+    elif filename.endswith(".c"):
+        return "C"
+    elif filename.endswith(".hpp"):
+        return "C"
+    elif filename.endswith(".cc"):
+        return "C"
+    elif filename.endswith(".cpp"):
+        return "C"
+    elif filename.endswith(".cu"):
+        return "C"
+    elif filename.endswith(".cuh"):
+        return "C"
+    elif filename.endswith(".go"):
+        return "C"
+    elif filename.endswith(".proto"):
+        return "C"
+    else:
+        print("Unsupported filetype %s", filename)
+        exit(0)
+
+
+PYTHON_ENCODE = re.compile("^[ \t\v]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)")
+
+
+def main(argv=None):
+    parser = argparse.ArgumentParser(
+        description='Checker for copyright declaration.')
+    parser.add_argument('filenames', nargs='*', help='Filenames to check')
+    args = parser.parse_args(argv)
+
+    retv = 0
+    for filename in args.filenames:
+        fd = io.open(filename, encoding="utf-8")
+        first_line = fd.readline()
+        second_line = fd.readline()
+        if "COPYRIGHT (C)" in first_line.upper(): continue
+        if first_line.startswith("#!") or PYTHON_ENCODE.match(
+                second_line) != None or PYTHON_ENCODE.match(first_line) != None:
+            continue
+        original_contents = io.open(filename, encoding="utf-8").read()
+        new_contents = generate_copyright(
+            COPYRIGHT, lang_type(filename)) + original_contents
+        print('Auto Insert Copyright Header {}'.format(filename))
+        retv = 1
+        with io.open(filename, 'w') as output_file:
+            output_file.write(new_contents)
+
+    return retv
+
+
+if __name__ == '__main__':
+    exit(main())
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,41 @@
+-   repo: local
+    hooks:
+    -   id: yapf
+        name: yapf
+        entry: yapf
+        language: system
+        args: [-i, --style .style.yapf]
+        files: \.py$
+
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    sha: a11d9314b22d8f8c7556443875b731ef05965464
+    hooks:
+    -   id: check-merge-conflict
+    -   id: check-symlinks
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+    -   id: detect-private-key
+    -   id: check-symlinks
+    -   id: check-added-large-files
+
+-   repo: local
+    hooks:
+    -   id: flake8
+        name: flake8
+        entry: flake8
+        language: system
+        args:
+        -   --count
+        -   --select=E9,F63,F7,F82
+        -   --show-source
+        -   --statistics
+        files: \.py$
+
+-   repo: local
+    hooks:
+    -   id: copyright_checker
+        name: copyright_checker
+        entry: python ./.copyright.hook
+        language: system
+        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
+        exclude: (?!.*third_party)^.*$
diff --git a/.style.yapf b/.style.yapf
@@ -0,0 +1,3 @@
+[style]
+based_on_style = pep8
+column_limit = 80
diff --git a/docs/configs/dataset_group.md b/docs/configs/dataset_group.md
@@ -62,7 +62,7 @@ DATASET Group存放所有与数据集相关的配置
 
 ## `VIS_FILE_LIST`
 
-可视化列表，调用`pdseg/train.py`进行训练时，如果打开了--use_tbx开关，则在每次模型保存的时候，会读取该列表中的图片进行可视化
+可视化列表，调用`pdseg/train.py`进行训练时，如果打开了--use_tb开关，则在每次模型保存的时候，会读取该列表中的图片进行可视化
 
 文件列表由多行组成，每一行的格式为
 ```
@@ -128,4 +128,4 @@ mydata/train/image4.jpg|mydata/train/image4.label.jpg
 
 ### 默认值
 
-255
+255
diff --git a/docs/usage.md b/docs/usage.md
@@ -27,10 +27,10 @@ python pdseg/export_model.py ${FLAGS} ${OPTIONS}
 |--cfg|ALL|配置文件路径|None||
 |--use_gpu|train/eval/vis|是否使用GPU进行训练|False||
 |--use_mpio|train/eval|是否使用多线程进行IO处理|False|打开该开关会占用一定量的CPU内存，但是可以提高训练速度。</br> NOTE：windows平台下不支持该功能, 建议使用自定义数据初次训练时不打开，打开会导致数据读取异常不可见。 </br> |
-|--use_tbx|train|是否使用tensorboardX记录训练数据|False||
+|--use_tb|train|是否使用TensorBoard记录训练数据|False||
 |--log_steps|train|训练日志的打印周期（单位为step）|10||
 |--debug|train|是否打印debug信息|False|IOU等指标涉及到混淆矩阵的计算，会降低训练速度|
-|--tbx_log_dir|train|tensorboardX的日志路径|None||
+|--tb_log_dir|train|TensorBoard的日志路径|None||
 |--do_eval|train|是否在保存模型时进行效果评估|False||
 |--vis_dir|vis|保存可视化图片的路径|"visual"||
 |--also_save_raw_results|vis|是否保存原始的预测图片|False||
@@ -76,17 +76,17 @@ unzip mini_pet.zip
 export CUDA_VISIBLE_DEVICES=0,1
 python pdseg/train.py --use_gpu \
                       --do_eval \
-                      --use_tbx \
-                      --tbx_log_dir train_log \
+                      --use_tb \
+                      --tb_log_dir train_log \
                       --cfg configs/unet_pet.yaml \
                       BATCH_SIZE 4 \
                       TRAIN.PRETRAINED_MODEL unet_coco_init \
                       DATASET.DATA_DIR mini_pet \
                       DATASET.TEST_FILE_LIST mini_pet/file_list/test_list.txt \
                       DATASET.TRAIN_FILE_LIST mini_pet/file_list/train_list.txt \
                       DATASET.VAL_FILE_LIST mini_pet/file_list/val_list.txt \
-                      DATASET.VIS_FILE_LIST mini_pet/file_list/val_list.txt
-                      TRAIN.SYNC_BATCH_NORM True
+                      DATASET.VIS_FILE_LIST mini_pet/file_list/val_list.txt \
+                      TRAIN.SYNC_BATCH_NORM True \
                       SOLVER.LR 5e-5
 ```
 
@@ -100,7 +100,7 @@ python pdseg/train.py --use_gpu \
 
 ### 训练过程可视化
 
-当打开do_eval和use_tbx两个开关后，我们可以通过TensorBoard查看训练的效果
+当打开do_eval和use_tb两个开关后，我们可以通过TensorBoard查看训练的效果
 ```shell
 tensorboard --logdir train_log --host {$HOST_IP} --port {$PORT}
 ```
@@ -147,4 +147,4 @@ python pdseg/export_model.py --cfg configs/unet_pet.yaml \
                                    TEST.TEST_MODEL test/saved_models/unet_pet/final
 ```
 
-模型会导出到freeze_model目录，接下来就是进行模型的部署，相关步骤，请查看[模型部署](./inference/README.md)
+模型会导出到freeze_model目录，接下来就是进行模型的部署，相关步骤，请查看[模型部署](./inference/README.md)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+[style]`
	`2`	`+based_on_style = pep8`
	`3`	`+column_limit = 80`