Je super, ze serial o nejakem frameworku pro strojove uceni vychazi. Jen bych poznamenal, ze Torch konkretne je pro praci s neuronovymi sitemi hodne lowlevel nastroj, ktery se pouziva spis pri vyvoji slozitejsich / novyych architektur. Pro bezne (i pokrocile) uzivatele neuronovych siti bych doporucil jako mnohem vhodnejsi Keras (s Tensorflow backendem).
Takže když budu dejme tomu rozpoznávat, jestli je na obrázku člověk nebo kočka, tak:
1) vstupů bude tolik, kolik pixelů
2) výstupy budou dva: pravděpodobnost psa, pravděpodobnost kočky (nebo ani jeden, popř. oba)
3) bude víc vrstvev uprosted sítě
Chápu to tak dobře? To množství vstupů totiž vypadá děsivě, už teď mi těch 2000 trénovacích dat trvalo naučit skoro minutu (i5).
Hoj!
Náhodou jsem včera úspěšně rozchodil tenhle tutorial, "šedesátiminutový blitz deep learningu", kde právě řeší klasifikaci obrázků do několika tříd:
https://github.com/soumith/cvpr2015/blob/master/Deep%20Learning%20with%20Torch.ipynb
Nešlo mi akorát zobrazovat obrázky pomocí volání itorch.image(), jde to ale nahradit pomocí image.display(), taky to žere ten obrázkový tenzor. Musel jsem si doinstalovat skrz luarocks balíčky qtlua a qttorch. Skript jsem nespouštěl voláním "th skript.lua" ale "qlua skript.lua".
BTW, cpi to na grafiku. Když bude moc počítání, třeba právě proto že je už ta neuronová síť velká, tak si grafika povede mnohem lépe než CPU.
Ještě přikládám lepenec kódu na základě toho jejich tutoriálu (velikost té sítě je jiná, já ji trochu zvětšoval abych si průbnul tu cudu):
require 'paths'
require 'image'
require 'nn'
require 'qt'
if (not paths.filep("cifar10torchsmall.zip")) then
os.execute('wget -c https://s3.amazonaws.com/torch7/data/cifar10torchsmall.zip')
os.execute('unzip cifar10torchsmall.zip')
end
trainset = torch.load('cifar10-train.t7')
testset = torch.load('cifar10-test.t7')
classes = {'airplane', 'automobile', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck'}
print(trainset)
print(#trainset.data)
--itorch.image(trainset.data[100]) -- display the 100-th image in dataset
--obr=image(trainset.data[100])
image.display(trainset.data[100])
--obr=createRGBImage(32,trainset.data[100])
print(trainset.data[100])
print(classes[trainset.label[100]])
--local img = image.decompressJPG(trainset.data[100],3,'byte')
--local img = image.load(trainset.data[100],3,'byte')
--image.display(img)
-- ignore setmetatable for now, it is a feature beyond the scope of this tutorial. It sets the index operator.
setmetatable(trainset,
{__index = function(t, i)
return {t.data[i], t.label[i]}
end}
);
trainset.data = trainset.data:double() -- convert the data from a ByteTensor to a DoubleTensor.
function trainset:size()
return self.data:size(1)
end
print(trainset:size()) -- just to test
print(trainset[33]) -- load sample number 33.
--itorch.image(trainset[33][1])
redChannel = trainset.data[{ {}, {1}, {}, {} }] -- this picks {all images, 1st channel, all vertical pixels, all horizontal pixels}
print(#redChannel)
mean = {} -- store the mean, to normalize the test set in the future
stdv = {} -- store the standard-deviation for the future
for i=1,3 do -- over each image channel
mean[i] = trainset.data[{ {}, {i}, {}, {} }]:mean() -- mean estimation
print('Channel ' .. i .. ', Mean: ' .. mean[i])
trainset.data[{ {}, {i}, {}, {} }]:add(-mean[i]) -- mean subtraction
stdv[i] = trainset.data[{ {}, {i}, {}, {} }]:std() -- std estimation
print('Channel ' .. i .. ', Standard Deviation: ' .. stdv[i])
trainset.data[{ {}, {i}, {}, {} }]:div(stdv[i]) -- std scaling
end
net = nn.Sequential()
net:add(nn.SpatialConvolution(3, 12, 5, 5)) -- 3 input image channels, 6 output channels, 5x5 convolution kernel
net:add(nn.ReLU()) -- non-linearity
net:add(nn.SpatialMaxPooling(2,2,2,2)) -- A max-pooling operation that looks at 2x2 windows and finds the max.
net:add(nn.SpatialConvolution(12, 16, 5, 5))
net:add(nn.ReLU()) -- non-linearity
net:add(nn.SpatialMaxPooling(2,2,2,2))
net:add(nn.View(16*5*5)) -- reshapes from a 3D tensor of 16x5x5 into 1D tensor of 16*5*5
net:add(nn.Linear(16*5*5, 120)) -- fully connected layer (matrix multiplication between input and weights)
net:add(nn.ReLU()) -- non-linearity
net:add(nn.Linear(120, 160))
net:add(nn.ReLU()) -- non-linearity
net:add(nn.Linear(160, 10)) -- 10 is the number of outputs of the network (in this case, 10 digits)
net:add(nn.LogSoftMax()) -- converts the output to a log-probability. Useful for classification problems
criterion = nn.ClassNLLCriterion()
trainer = nn.StochasticGradient(net, criterion)
trainer.learningRate = 0.001
trainer.maxIteration = 5 -- just do 5 epochs of training.
trainer:train(trainset)
print(classes[testset.label[100]])
--itorch.image(testset.data[100])
testset.data = testset.data:double() -- convert from Byte tensor to Double tensor
for i=1,3 do -- over each image channel
testset.data[{ {}, {i}, {}, {} }]:add(-mean[i]) -- mean subtraction
testset.data[{ {}, {i}, {}, {} }]:div(stdv[i]) -- std scaling
end
-- for fun, print the mean and standard-deviation of example-100
horse = testset.data[100]
print(horse:mean(), horse:std())
-- co si sit mysli
print(classes[testset.label[100]])
image.display(testset.data[100])
predicted = net:forward(testset.data[100])
-- the output of the network is Log-Probabilities. To convert them to probabilities, you have to take e^x
print(predicted:exp())
-- To make it clearer, let us tag each probability with its class-name
for i=1,predicted:size(1) do
print(classes[i], predicted[i])
end
-- Alright, fine. One single example sucked, but how many in total seem to be correct over the test set?
correct = 0
for i=1,10000 do
local groundtruth = testset.label[i]
local prediction = net:forward(testset.data[i])
local confidences, indices = torch.sort(prediction, true) -- true means sort in descending order
if groundtruth == indices[1] then
correct = correct + 1
end
end
print(correct, 100*correct/10000 .. ' % ')
-- Hmmm, what are the classes that performed well, and the classes that did not perform well
class_performance = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
for i=1,10000 do
local groundtruth = testset.label[i]
local prediction = net:forward(testset.data[i])
local confidences, indices = torch.sort(prediction, true) -- true means sort in descending order
if groundtruth == indices[1] then
class_performance[groundtruth] = class_performance[groundtruth] + 1
end
end
for i=1,#classes do
print(classes[i], 100*class_performance[i]/1000 .. ' %')
end
require 'cunn';
net = net:cuda()
criterion = criterion:cuda()
trainset.data = trainset.data:cuda()
trainset.label = trainset.label:cuda()
trainer = nn.StochasticGradient(net, criterion)
trainer.learningRate = 0.001
trainer.maxIteration = 5 -- just do 5 epochs of training.
trainer:train(trainset)
torch.save('obrazkova_nn', net)