PyTorch之图像分类(代码版)

正文

pytorch通过torchvision调用预训练模型来做图像分类

数据加载

DataSet.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import csv
from torchvision import transforms
import torch.utils.data as data
from PIL import Image, ImageOps, ImageFilter
from PIL import Image
import torch
import cv2 as cv
import os

def read_data_paths(root_path, num_classes):
labels = [] # 记录标签名
img_info = [] # 记录图像路径和对应标签编号
weight = [0 for i in range(num_classes)] # 记录每个种类数量,后期可以用来做不均衡数据损失加权
# 读csv
with open(os.path.join(root_path,'Data_Entry_2017_v2020.csv')) as f:
f_csv = csv.reader(f)
headers = next(f_csv) # 获取头
for row in f_csv: # 逐行读取
label_name = row[1].split('|')[0].strip(' ') # 读取病名,多个疾病将第一个作为标签
if label_name not in labels: # 看是否在标签列表中
labels.append(label_name) # 没有加入
img_info.append((os.path.join(root_path,'images',row[0]), labels.index(label_name)))
weight[int(labels.index(label_name))] += 1
return labels, img_info, weight

class FixedScaleResize: # 图像拉升,为了不改变里面形状,拉升时,四周不足填充的黑色
def __init__(self,crop_size):
self.size = (crop_size, crop_size) # size: (h, w)
self.crop_size = crop_size
def __call__(self, img):
w, h = img.size
if w == h:
img = img.resize(self.size, Image.BILINEAR)
if w > h:
sw = self.crop_size
sh = int(1.0 * h * sw / w)
else:
sh = self.crop_size
sw = int(1.0 * w * sh / h)
img = img.resize((sw, sh), Image.BILINEAR)
expend_w = int((self.crop_size - sw) / 2)
expand_h = int((self.crop_size - sh) / 2)
img = ImageOps.expand(img, border=(
expend_w, expand_h, self.crop_size - expend_w - sw, self.crop_size - expand_h - sh), fill=0)
return img

class ImageFolder(data.Dataset): # 图像加载器
def __init__(self, args):
self.root_path = args.data_path
self.img_size = args.img_size
self.labels, self.img_info, self.weight = read_data_paths(self.root_path)
self.transform = transforms.Compose([
FixedScaleResize(self.img_size),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225])
])
def __getitem__(self, item):
img_path, label = self.img_info[item]
print(img_path, label)
img = Image.open(img_path).convert('RGB')
img = self.transform(img)
label = int(label)
return img, label

def __len__(self):
return len(self.img_info)

训练

train.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import argparse
import torch
from DataSet import ImageFolder
import os
import torch.nn as nn
import torchvision.models as models
from tqdm import tqdm
parser = argparse.ArgumentParser()
parser.add_argument('--data_path', type=str, default='/')
parser.add_argument('--img_size', type=int, default=128)

parser.add_argument('--num_classes', type=int, default=15)
parser.add_argument('--batch_size', type=int, default=1)
parser.add_argument('--epochs', type=int, default=100)
parser.add_argument('--base_lr', type=float, default=0.01)
parser.add_argument('--n_gpu', type=int, default=-1)

parser.add_argument('--model_name', type=str,help='densnet or resnet',default='resnet')
parser.add_argument('--num_print', type=int ,default=1)

device = 'cuda' if torch.cuda.is_available() else 'cpu'

def load_data(args):
data_set = ImageFolder(args)
train_size = int(len(data_set)*0.9)
test_size = len(data_set) - train_size
print('train length: %d, test length: %d'%(train_size, test_size))
train_set, test_set = torch.utils.data.random_split(data_set, [train_size, test_size])
train_iter = torch.utils.data.DataLoader(dataset=train_set, batch_size=args.batch_size, shuffle=True, num_workers=4)
test_iter = torch.utils.data.DataLoader(dataset=test_set, batch_size=args.batch_size, shuffle=True,
num_workers=4)
return train_iter, test_iter,data_set.labels, data_set.weight

def get_cur_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group['lr']

def trainer(model, train_iter, test_iter, criterion, optimizer, num_print):
model.train()
total, correct = 0, 0
train_loss = 0
# 训练
for i, (inputs, labels) in enumerate(train_iter):
print(i)
inputs, labels = inputs.to(device), labels.to(device)
output = model(inputs)
loss = criterion(output, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss += loss.sum().item()
total += labels.size(0)
correct += torch.eq(output.argmax(dim=1), labels).sum().item()
train_acc = 100 * correct / total
if (i + 1) % num_print == 0:
print("step: [{}/{}], train_loss: {:.3f} | train_acc: {:6.3f}% | lr: {:.6f}" \
.format(i + 1, len(train_iter), train_loss/(i+1), \
train_acc, get_cur_lr(optimizer)))

# 验证
total, correct = 0, 0
for i, (inputs, labels) in enumerate(test_iter):
inputs, labels = inputs.to(device), labels.to(device)
output = model(inputs)
total += labels.size(0)
correct += torch.eq(output.argmax(dim=1), labels).sum().item()
test_acc = 100 * correct / total
if (i + 1) % (num_print / 5) == 0:
print("step: [{}/{}], | test_acc: {:6.3f}% " \
.format(i + 1, len(test_iter), test_acc))

return train_acc

def train(args):
# 模型保存路径
save_name = '%s_ep%d_lr%f_bs%d_imgsize%d'%(args.model_name, args.epochs, args.base_lr, args.batch_size, args.img_size)
save_path = os.path.join('./save_model', save_name)
if os.path.exists(save_path):
os.mkdir(save_path)

# 模型
if args.model_name == 'densnet':
model = models.densenet121(pretrained=True)
model.classifier = nn.Linear(in_features=1024,out_features=args.num_classes, bias=True) # 将与训练模型最后一层输出类别改成自己的类别数
else:
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(in_features=2048, out_features=args.num_classes, bias=True)
model = model.to(device)

if args.n_gpu > 1:
model = nn.DataParallel(model)
args.batch_size = args.batch_size * args.n_gpu

# 优化器
optimizer = torch.optim.SGD(model.parameters(), lr=args.base_lr, momentum=0.9, weight_decay=0.01)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,'min',patience=5,factor=0.8,min_lr=0.00000000001) # 自适应学习率,当5个epoch 损失还没下降,就会把学习率*0.8

# 加载数据
train_iter, test_iter,labels, sample_weight = load_data(args)
# 记录标签名的映射关系
fw = open(os.path.join(save_path,'label.txt'),'w')
for label in labels:
fw.write(str(label)+'\n')
fw.close()

criterion = nn.CrossEntropyLoss() # 交叉墒损失

train_acc = list()
max_acc = 0
for epoch in tqdm(range(args.epochs)):
print('=================epoch:[{}/{}]======================'.format(epoch + 1, args.epochs))
record_train = trainer(model, train_iter, test_iter, criterion, optimizer, args.num_print)
train_acc.append(record_train)
scheduler.step(record_train) # 自适应学习率

# 保存模型
if record_train > max_acc:
max_acc = record_train
torch.save(model, '%s/%s_%s.pth' % (save_path, epoch, str(record_train)))
torch.save(model.state_dict(), '%s/%s_%s_params.pth' % (save_path, epoch, str(record_train)))

torch.save(model, '%s/final.pth' % save_path)
torch.save(model.state_dict(), '%s/final_params.pth' % save_path)

if __name__ == '__main__':
args = parser.parse_args()
train(args)

测试

test.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import argparse
import torch
from DataSet import ImageFolder, FixedScaleResize
from PIL import Image
from torchvision import transforms
parser = argparse.ArgumentParser()
parser.add_argument('--data_path', type=str, default='./')
parser.add_argument('--img_size', type=int, default=128)
parser.add_argument('--model_path', type=str, default='')
parser.add_argument('--num_classes',type=int,default=15)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

def load_data():
test_set = ImageFolder(args)
test_iter = torch.utils.data.DataLoader(dataset=test_set, batch_size=1, shuffle=torch, num_workers=4)
return test_iter, test_set.labels

def test(args):
model=torch.load(args.model_path) # 载入模型
correct = 0
test_iter = load_data()
for i, (inputs,labels) in enumerate(test_iter):
inputs, labels = inputs.to(device) , labels.to(device)
output = model(inputs)
idx = output.argmax(dim=1)
correct += torch.eq(output.argmax(dim=1), labels).sum().item()
print(labels,idx)
print('acc : '+str(100 * correct / len(test_iter)))

def testr_sample(args, image_path):
transform = transforms.Compose([
FixedScaleResize(args, args.img_size),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
model = torch.load(args.model_path) # 载入模型
image = Image.open(image_path).convert('RGB')
image = transform(image)
output = model(image)
print(output.argmax(dim=1))

if __name__ == '__main__':
args = parser.parse_args()
test(args)

shell脚本

train.sh

1
2
3
4
5
#!/bin/bash

DATA_DIR='.'
$MODEL_NAME='resnet'
CUDA_VISIBLE_DEVICES=$1 nohup python -u train.py --data_path $DATA_DIR --img_size 224 --num_classes 4 --batch_size 16 --epochs 500 --n_gpu $2 --model_name $MODEL_NAME >> $MODEL_NAME.out

执行命令

1
sh train.sh 0,1 2 &

0,1 表示指明哪两块显卡,2表示用了两个gpu


本博客所有文章除特别声明外,均采用 CC BY-SA 4.0 协议 ,转载请注明出处!