from paddlehub.dataset.base_cv_dataset import BaseCVDataset
import os
import random
folder_base="pics"
text_base="dataset"
folder_name=['yushuxin','xujiaqi','zhaoxiaotang','anqi','wangchengxuan']
def write_list():
train_folder=[]
validate_folder=[]
for folder in folder_name:
pics=os.listdir(os.path.join(text_base,folder_base,folder))
# 打乱图片
random.shuffle(pics)
# 每类取20张作为验证集
train_folder.append(pics[:-20])
validate_folder.append(pics[-20:])
# 生成训练集 验证集配置文件
with open(os.path.join(text_base,'train_list.txt'),'w') as f:
for index,pics in enumerate(train_folder):
for pic in pics:
f.write(os.path.join(folder_base,folder_name[index],pic)+' '+str(index)+'\n')
f.close()
with open(os.path.join(text_base,'validate_list.txt'),'w') as f:
for index,pics in enumerate(validate_folder):
for pic in pics:
f.write(os.path.join(folder_base,folder_name[index],pic)+' '+str(index)+'\n')
f.close()
with open(os.path.join(text_base,'test_list.txt'),'w') as f:
# 将test的相对路径先改变 不然训练会出错
f.write('test/yushuxin.jpg 0\n');
f.write('test/xujiaqi.jpg 1\n');
f.write('test/zhaoxiaotang.jpg 2\n');
f.write('test/anqi.jpg 3\n');
f.write('test/wangchengxuan.jpg 4');
f.close()
class DemoDataset(BaseCVDataset):
def __init__(self):
# 数据集存放位置
self.dataset_dir = "dataset"
super(DemoDataset, self).__init__(
base_path=self.dataset_dir,
train_list_file="train_list.txt",
validate_list_file="validate_list.txt",
test_list_file="test_list.txt",
label_list_file="label_list.txt",
)
write_list()
dataset = DemoDataset()
2020-04-26 09:03:14,531-WARNING: paddle.fluid.layers.py_reader() may be deprecated in the near future. Please use paddle.fluid.io.DataLoader.from_generator() instead.
[2020-04-26 09:03:14,850] [ INFO] - Strategy with scheduler: {'warmup': 0.0, 'linear_decay': {'start_point': 1.0, 'end_learning_rate': 0.0}, 'noam_decay': False, 'discriminative': {'blocks': 0, 'factor': 2.6}, 'gradual_unfreeze': 0, 'slanted_triangle': {'cut_fraction': 0.0, 'ratio': 32}}, regularization: {'L2': 0.001, 'L2SP': 0.0, 'weight_decay': 0.0} and clip: {'GlobalNorm': 0.0, 'Norm': 0.0}
[2020-04-26 09:03:35,033] [ INFO] - Try loading checkpoint from cv_finetune_turtorial_demo/ckpt.meta
[2020-04-26 09:03:35,035] [ INFO] - PaddleHub model checkpoint not found, start from scratch...
[2020-04-26 09:03:35,150] [ INFO] - PaddleHub finetune start
[2020-04-26 09:03:47,510] [ TRAIN] - step 10 / 42: loss=0.24024 acc=0.97500 [step/sec: 0.57]
[2020-04-26 09:03:47,511] [ INFO] - Evaluation on dev dataset start
2020-04-26 09:03:48,333-WARNING: paddle.fluid.layers.py_reader() may be deprecated in the near future. Please use paddle.fluid.io.DataLoader.from_generator() instead.
share_vars_from is set, scope is ignored.
[2020-04-26 09:03:52,060] [ EVAL] - [dev dataset evaluation result] loss=0.78752 acc=0.72500 [step/sec: 0.90]
[2020-04-26 09:03:52,061] [ EVAL] - best model saved to cv_finetune_turtorial_demo/best_model [best acc=0.72500]
2020-04-26 09:03:53,383-WARNING: paddle.fluid.layers.py_reader() may be deprecated in the near future. Please use paddle.fluid.io.DataLoader.from_generator() instead.
[2020-04-26 09:04:00,589] [ TRAIN] - step 20 / 42: loss=0.03348 acc=1.00000 [step/sec: 0.64]
[2020-04-26 09:04:00,590] [ INFO] - Evaluation on dev dataset start
[2020-04-26 09:04:03,909] [ EVAL] - [dev dataset evaluation result] loss=0.43007 acc=0.84167 [step/sec: 0.91]
[2020-04-26 09:04:03,910] [ EVAL] - best model saved to cv_finetune_turtorial_demo/best_model [best acc=0.84167]
[2020-04-26 09:04:12,277] [ TRAIN] - step 30 / 42: loss=0.00609 acc=1.00000 [step/sec: 0.70]
[2020-04-26 09:04:12,278] [ INFO] - Evaluation on dev dataset start
[2020-04-26 09:04:15,549] [ EVAL] - [dev dataset evaluation result] loss=0.27297 acc=0.88333 [step/sec: 0.92]
[2020-04-26 09:04:15,550] [ EVAL] - best model saved to cv_finetune_turtorial_demo/best_model [best acc=0.88333]
[2020-04-26 09:04:23,595] [ TRAIN] - step 40 / 42: loss=0.00310 acc=1.00000 [step/sec: 0.79]
[2020-04-26 09:04:23,596] [ INFO] - Evaluation on dev dataset start
[2020-04-26 09:04:26,920] [ EVAL] - [dev dataset evaluation result] loss=0.20050 acc=0.90000 [step/sec: 0.90]
[2020-04-26 09:04:26,922] [ EVAL] - best model saved to cv_finetune_turtorial_demo/best_model [best acc=0.90000]
[2020-04-26 09:04:29,548] [ INFO] - Evaluation on dev dataset start
[2020-04-26 09:04:32,778] [ EVAL] - [dev dataset evaluation result] loss=0.19163 acc=0.89167 [step/sec: 0.93]
[2020-04-26 09:04:32,779] [ INFO] - Load the best model from cv_finetune_turtorial_demo/best_model
[2020-04-26 09:04:33,288] [ INFO] - Evaluation on test dataset start
[2020-04-26 09:04:33,583] [ EVAL] - [test dataset evaluation result] loss=0.36848 acc=1.00000 [step/sec: 3.43]
[2020-04-26 09:04:33,585] [ INFO] - Saving model checkpoint to cv_finetune_turtorial_demo/step_45
[2020-04-26 09:04:34,922] [ INFO] - PaddleHub finetune finished.
这里虽然我最后的准确率没有达到百分之百,但这反而会使得模型不那么过拟合从而使推断性能好一点。
Step6、预测
当Finetune完成后,我们使用模型来进行预测,先通过以下命令来获取测试的图片
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
with open(os.path.join(text_base,'test_list.txt'),'w') as f:
# 将test的相对路径再改回来
f.write('dataset/test/yushuxin.jpg 0\n');
f.write('dataset/test/xujiaqi.jpg 1\n');
f.write('dataset/test/zhaoxiaotang.jpg 2\n');
f.write('dataset/test/anqi.jpg 3\n');
f.write('dataset/test/wangchengxuan.jpg 4');
f.close()
with open("dataset/test_list.txt","r") as f:
filepath = f.readlines()
data = [filepath[0].split(" ")[0],filepath[1].split(" ")[0],filepath[2].split(" ")[0],filepath[3].split(" ")[0],filepath[4].split(" ")[0]]
label_map = dataset.label_dict()
index = 0
run_states = task.predict(data=data)
results = [run_state.run_results for run_state in run_states]
for batch_result in results:
print(batch_result)
batch_result = np.argmax(batch_result, axis=2)[0]
print(batch_result)
for result in batch_result:
index += 1
result = label_map[result]
print("input %i is %s, and the predict result is %s" %
(index, data[index - 1], result))
[2020-04-26 09:04:34,934] [ INFO] - The best model has been loaded
[2020-04-26 09:04:34,935] [ INFO] - PaddleHub predict start
share_vars_from is set, scope is ignored.
[2020-04-26 09:04:35,332] [ INFO] - PaddleHub predict finished.
[array([[9.9538785e-01, 1.4281583e-03, 1.4344796e-03, 1.5812222e-03,
1.6829165e-04],
[1.5182982e-01, 4.8975992e-01, 3.1340811e-02, 1.5656298e-01,
1.7050646e-01],
[1.7682713e-01, 2.9195112e-01, 3.3537161e-01, 1.8833089e-01,
7.5192782e-03],
[2.2442464e-03, 1.8785913e-02, 1.1669159e-03, 9.7122192e-01,
6.5810294e-03],
[3.4868010e-04, 1.0628232e-03, 1.2720286e-04, 6.7610515e-04,
9.9778521e-01]], dtype=float32)]
[0 1 2 3 4]
input 1 is dataset/test/yushuxin.jpg, and the predict result is 虞书欣
input 2 is dataset/test/xujiaqi.jpg, and the predict result is 许佳琪
input 3 is dataset/test/zhaoxiaotang.jpg, and the predict result is 赵小棠
input 4 is dataset/test/anqi.jpg, and the predict result is 安崎
input 5 is dataset/test/wangchengxuan.jpg, and the predict result is 王承渲