「.NET 開発基盤部会 Wiki」は、「Open棟梁Project」,「OSSコンソーシアム .NET開発基盤部会」によって運営されています。
C言語、C++、Python、Java、Go
人間が用いる学習や論理的思考と似たように、
パターンや相関を検出し解釈する
ニューラルネットワークを構築、訓練の要求を満たす。
Pythonで書かれている。
∴ フレームワークによる記述を非常に高級にしてくれるラッパーとして機能する。
機械学習・深層学習の特性上、インタラクティブな環境の方が習得が捗る。
→ チュートリアル
→ チュートリアル
!pip install --upgrade pip
!pip install -U tensorflow
!pip install -U keras
!pip install numpy !pip install pandas !pip install seaborn !pip install statsmodels !pip install scikit-learn !pip install opencv-python
import warnings warnings.filterwarnings('ignore')
import pandas as pd import numpy as np import matplotlib.pyplot as plt %matplotlib inline
import tensorflow as tf from tensorflow.keras.layers import BatchNormalization print(tf.__version__)
import keras print(keras.__version__) # モデル定義 from keras.models import Model, Sequential, model_from_json from keras.layers import Dense, Input, Activation, Flatten, Dropout, LSTM from keras.layers.convolutional import Conv2D from keras.layers.pooling import MaxPool2D from keras.callbacks import EarlyStopping, ModelCheckpoint from keras import optimizers from keras.optimizers import SGD, Adam # その他 from keras.applications.vgg16 import VGG16 from keras.utils import np_utils
from sklearn import metrics from sklearn.metrics import confusion_matrix as cm from sklearn.model_selection import train_test_split
import cv2 # OpenCV
apt-get update && apt-get upgrade -y apt-get install -y libgl1-mesa-dev apt-get install -y libopencv-dev
from statsmodels.tsa.seasonal import seasonal_decompose
from keras.datasets import mnist (x_train_org, y_train_org), (x_test_org, y_test_org) = mnist.load_data() print(x_train_org.shape, x_test_org.shape) print(y_train_org.shape, y_test_org.shape)
show_image_info(x_train_org, y_train_org, [0,1,2,3,4,5,6,7,8,9], 10)
x_train_std = x_train_org.astype('f') x_test_std = x_test_org.astype('f')
x_train_std /= 255 x_test_std /= 255
y_train = np_utils.to_categorical(y_train_org, num_classes=10).astype('i') y_test = np_utils.to_categorical(y_test_org, num_classes=10).astype('i')・デコーディング
print((y_train.argmax(axis=1) == y_train_org).all()) print((y_test.argmax(axis=1) == y_test_org).all())
# モデルのインスタンスを作成 model = Sequential() # addメソッドで層を追加していく。 # Flatten:入力の変換層、入力サイズを指定 model.add(Flatten(input_shape=(28, 28))) # 入力層 # Dense:全結合(線形結合)層、出力サイズを指定 model.add(Dense(900)) # Activation: 活性化関数を定義(ReLU model.add(Activation('relu')) # 多層化:繰り返し model.add(Dense(1000)) model.add(Activation('relu')) model.add(Dense(500)) model.add(Activation('relu')) # 出力層: # Dense:全結合(線形結合)層、出力サイズを指定 model.add(Dense(10)) # Activation: 活性化関数を定義(softmax model.add(Activation('softmax')) # 誤差関数、最適化手法、評価基準を指定してコンパイル # ・損失関数 :categorical_crossentropy(分類の定番、回帰ならRMSE) # ・最適化手法:SGD(基本的な確率的勾配降下法) # ・評価方法 :Accuracy(精度)に指定 model.compile(loss='categorical_crossentropy', optimizer=SGD(), metrics=['accuracy'])
batch_size = 100 n_epoch = 20 # Keras Model の fit() は History オブジェクトを返す hist = model.fit(x_train_std, y_train, validation_data=(x_test_std, y_test), batch_size=batch_size, epochs=n_epoch, verbose=1 )※ batch_size、epochsについてはコチラ
index = 10 show_image_info(x_train_org, y_train_org, [0,1,2,3,4,5,6,7,8,9], index) predict = model.predict(x_test_std[index].reshape(1, 28, 28)).argmax() answer = y_test_org[index] print('predict: ', predict) print('answer : ', answer) if predict == answer: print('正解') else: print('不正解')
model.evaluate(x_test_std, y_test) #(損失値、精度)を返す
hist.history
# 損失値(Loss)の遷移のプロット def plot_history_loss(hist): plt.plot(hist.history['loss'],label="loss for training") plt.plot(hist.history['val_loss'],label="loss for validation") plt.title('model loss') plt.xlabel('epoch') plt.ylabel('loss') plt.legend(loc='best') plt.show() # 精度(Accuracy)の遷移のプロット def plot_history_acc(hist): plt.plot(hist.history['accuracy'],label="accuracy for training") plt.plot(hist.history['val_accuracy'],label="accuracy for validation") plt.title('model accuracy') plt.xlabel('epoch') plt.ylabel('accuracy') plt.legend(loc='best') plt.ylim([0, 1]) plt.show()・グラフ表示
plot_history_loss(hist) plot_history_acc(hist)
result = model.predict(x_test_std).argmax(axis=1) confmat = cm(y_test_org, result) # y_testはOne-Hot表現前 confmat
def plot_cm(confmat, size): fig, ax = plt.subplots(figsize=(size, size)) ax.matshow(confmat, cmap=plt.cm.Blues, alpha=0.3) for i in range(confmat.shape[0]): for j in range(confmat.shape[1]): ax.text(x=j, y=i, s=confmat[i, j], va='center', ha='center') plt.xticks(np.arange(0, size, 1)) plt.yticks(np.arange(0, size, 1)) plt.xlabel('predicted label') plt.ylabel('true label') plt.show()
plot_cm(confmat, 10)
def print_metrics(data, label, result): print('accuracy: %.3f' % metrics.accuracy_score(label, result)) # 正答率 print('recall: %.3f' % metrics.recall_score(label, result, average='macro')) # 再現率(マクロ平均) print('precision: %.3f' % metrics.precision_score(label, result, average='macro')) # 適合率(マクロ平均) print('f1_score: %.3f' % metrics.f1_score(label, result, average='macro')) # f値(マクロ平均)
print_metrics(x_test_std, y_test_org, result) # y_testはOne-Hot表現前
index = (y_test_org != result) for i, val in enumerate(index): if val == True: print('predict: ', result[i]) print('answer : ', y_test_org[i]) show_image_info(x_test_org, y_test_org, [0,1,2,3,4,5,6,7,8,9], i)
import os from urllib import request os.mkdir('./datasets') url = 'https://.../train.pickle' request.urlretrieve(url, './datasets/train.pickle') url = 'https://.../test.pickle' request.urlretrieve(url, './datasets/test.pickle') url = 'https://.../label.pickle' request.urlretrieve(url, './datasets/label.pickle')
from keras.datasets import cifar10 (x_train,y_train),(x_test,y_test)=cifar10.load_data()https://qiita.com/God_KonaBanana/items/10fa8bb58cdd1dbd2e59
def unpickle(file): import pickle with open(file, 'rb') as f: return pickle.load(f, encoding='bytes')
train = unpickle('./datasets/train.pickle') test = unpickle('./datasets/test.pickle') label = unpickle('./datasets/label.pickle')
x_train_org = train['data'] y_train_org = train['label'] x_test_org = test['data'] y_test_org = test['label'] print(x_train_org.shape) print(y_train_org.shape) print(x_test_org.shape) print(y_test_org.shape)
# サンプル数, height, width, channelへ変更 x_train = x_train_org.transpose([0, 2, 3, 1]) x_test = x_test_org.transpose([0, 2, 3, 1])
x_train_std = x_train.astype('f') x_test_std = x_test.astype('f')
x_train_std /= 255 x_test_std /= 255
y_train = np_utils.to_categorical(y_train_org, num_classes=5).astype('i') y_test = np_utils.to_categorical(y_test_org, num_classes=5).astype('i')・デコーディング
print((y_train.argmax(axis=1) == y_train_org).all()) print((y_test.argmax(axis=1) == y_test_org).all())
show_image_info(x_train, y_train_org, label, 1300)
model = Sequential() # 畳み込み層とプーリング層1 model.add(Conv2D(input_shape=(32, 32, 3), filters=64, kernel_size=(4, 4), strides=(1, 1), padding='same')) # 畳み込み層 # バッチ正規化の追加位置 model.add(MaxPool2D(pool_size=(2, 2))) # プーリング増 model.add(Activation('relu')) # 畳み込み層とプーリング層2 model.add(Conv2D(filters=128, kernel_size=(4, 4), strides=(1, 1), padding='same')) # 畳み込み層 # バッチ正規化の追加位置 model.add(MaxPool2D(pool_size=(2, 2))) # プーリング増 model.add(Activation('relu')) # 畳み込み層とプーリング層3 model.add(Conv2D(filters=128, kernel_size=(4, 4), strides=(1, 1), padding='same')) # 畳み込み層 # バッチ正規化の追加位置 model.add(MaxPool2D(pool_size=(2, 2))) # プーリング増 model.add(Activation('relu')) model.add(Flatten()) # 全結合層1 model.add(Dense(512)) model.add(Activation('relu')) # Dropoutの追加位置 # 全結合層2 model.add(Dense(5)) model.add(Activation('softmax')) # コンパイル model.compile(loss='categorical_crossentropy', optimizer=SGD(0.01), # 学習率:0.01 metrics=['accuracy'])
batch_size = 500 n_epoch = 30 hist = model.fit(x_train_std , y_train, validation_data=(x_test_std, y_test), batch_size=batch_size, epochs=n_epoch, verbose=1)
index = 10 show_image_info(x_train, y_train_org, label, index) predict = model.predict(x_test_std[index].reshape(1, 32, 32, 3)).argmax() answer = y_test_org[index] print('predict: ', predict) print('answer : ', answer) if predict == answer: print('正解') else: print('不正解')
index = (y_test_org != result) for i, val in enumerate(index): if val == True: print('predict: ', result[i]) print('answer : ', y_test_org[i]) show_image_info(x_test, y_test_org, label, i)
def show_image_info(x, y, label, index): print(label[y[index]]) plt.imshow(x[index].astype(np.uint8)) plt.show()
OpenCVを使用する。
url = 'https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png' request.urlretrieve(url, 'lenna.png')
img = cv2.imread('lenna.png') print(type(img)) print(img.shape) plt.imshow(img) # OpenCVはBGR解釈なので青みがかる。
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # RGB解釈に変更。 plt.imshow(img) # 元のRGB解釈の色合いで表示される。 cv2.imwrite('new_lenna.jpg', img) # RGB解釈で保存
def diff_image_info(img1, img2): print(img1.shape) print(img2.shape) plt.subplot(1, 2, 1) plt.imshow(img1) plt.subplot(1, 2, 2) plt.imshow(img2)
img2 = cv2.resize(img, (224, 224)) diff_image_info(img, img2)
img2 = img[100:400,100:400,:] diff_image_info(img, img2)
h, w, c = img.shape img2 = img[:, int(w * (1/5)): int(w *(4/5)), :] diff_image_info(img, img2)
grayed = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) diff_image_info(img, grayed)
th, binary = cv2.threshold(grayed, 125, 255, cv2.THRESH_BINARY) diff_image_info(grayed, binary)
blurred = cv2.GaussianBlur(binary, (11, 11), 0) diff_image_info(binary, blurred)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) clahed = clahe.apply(grayed) diff_image_info(grayed, clahed)
img2 = x.astype('f') img2 /= 255 # 0~1正規化 img2 -= np.mean(img2) # スケーリング diff_image_info(img, img2)※ img単位ではなくxに対して適用できる。
img2 = img.astype('f') img2 -= img.min()# 最小値を引く img2 /= img.max()# 最大値で割る diff_image_info(img, img2)※ img単位ではなくxに対して適用できる。
flipped = cv2.flip(img, 1) diff_image_info(img, flipped)
def opencv_rotate(img, angle=30): size = (img.shape[0], img.shape[1]) center = (int(size[0]/2), int(size[1]/2)) rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0) return cv2.warpAffine(img, rotation_matrix, size)・回転実行
rotated = opencv_rotate(img, 30) diff_image_info(img, rotated)
def opencv_move(img, h=100, v=50): rows, cols, channnels = img.shape M = np.float32([[1,0,h],[0,1,v]]) return cv2.warpAffine(img, M, (cols, rows))・並進実行
moved = opencv_move(img, 200, 100) diff_image_info(img, moved)
def opencv_zoomin(img, h=2.0, v=2.0): zoomed = cv2.resize(img, None, fx=h, fy=v) height_1, width_1, channel_1 = img.shape height_2, width_2, channel_2 = zoomed.shape x = int((width_2 - width_1) / 2) y = int((height_2 - height_1) / 2) return zoomed[y:y+height_1, x:x+width_1]・拡大実行
zoomed = opencv_zoomin(img, 2, 3) diff_image_info(img, zoomed)
def opencv_gamma(img, gamma=0.5): look_up_table = np.zeros((256, 1), dtype='uint8') for i in range(256): look_up_table[i][0] = 255 * pow(float(i) / 255, 1.0 / gamma) return cv2.LUT(img, look_up_table)・ガンマ変換
img_gamma = opencv_gamma(img, 0.3) diff_image_info(img, img_gamma)
def opencv_gaussian(img, loc=0.0, scale=5.0): row, col, ch = img.shape noise = np.random.normal(loc,scale,(row,col,ch)) noise = noise.reshape(row,col,ch) noised = img + noise noised /= 255 return noised・ガンマ変換
img_gaussian = opencv_gaussian(img, 50, 100) diff_image_info(img, img_gaussian)
def get_changed(img): # グレースケール化 ... # ヒストグラム平坦化 ... # 平滑化 ... # カラーでなくなっている場合、 # 次元が減っているので、追加する。 return blurred[:,:,np.newaxis]
def get_augmented(img): # 左右反転 if np.random.rand() > 0.5: img = cv2.flip(img, 1) # 左右度回転 if np.random.rand() > 0.5: size = (img.shape[0], img.shape[1]) center = (int(size[0]/2), int(size[1]/2)) angle = np.random.randint(-45, 45) # -45 ~ +45 の範囲で rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0) img = cv2.warpAffine(img, rotation_matrix, size) return img plt.imshow(get_augmented(x_train[1300]).astype(np.uint8))
inputs = VGG16(weights='imagenet', include_top=False, input_tensor=Input(shape=(32, 32, 3))) # inputs.output_shape => (None, 1, 1, 512)
n_class = 5 output = Sequential() output.add(Flatten(input_shape=inputs.output_shape[1:])) output.add(Dense(256)) output.add(Activation('relu')) output.add(Dropout(0.5)) output.add(Dense(n_class)) output.add(Activation('softmax'))
model = Model(inputs=base_model.input, outputs=output(base_model.output))
for layer in model.layers[:15]: layer.trainable = False model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.0001), metrics=['accuracy'])
model.summary()
def preprocess_vgg16(img): # リサイズ img= cv2.resize(img, (32, 32)) # RGBからそれぞれvgg指定の値を引く # (mean-subtractionに相当) img[:, :, 0] -= 103.939 img[:, :, 1] -= 116.779 img[:, :, 2] -= 123.68 return img
x_train_list = [] for img in x_train: x_train_list.append(preprocess_vgg16(get_augmented(get_changed(img)))) x_train_aug = np.array(x_train_list) x_test_list = [] for img in x_test: x_test_list.append(preprocess_vgg16(get_augmented(get_changed(img)))) x_test_aug = np.array(x_test_list)
batch_size = 100 n_epoch = 1 # 試用なので回数を減らす
https://github.com/AileenNielsen/TimeSeriesAnalysisWithPython/blob/master/data/AirPassengers.csv
url = 'https://raw.githubusercontent.com/AileenNielsen/TimeSeriesAnalysisWithPython/master/data/AirPassengers.csv' from urllib import request request.urlretrieve(url, './work/AirPassengers.csv')
df = pd.read_csv('./work/AirPassengers.csv')
df.head()
df.tail()
df.columns = ['Month', 'Passengers']
plt.plot(df['Passengers']) plt.xticks(np.arange(0, 145, 12)) plt.grid() plt.show()
from statsmodels.tsa.seasonal import seasonal_decompose sd = seasonal_decompose(df['Passengers'].values, period=12) # periodで周期を指定 sd.plot() plt.show()
data = df['Passengers'].values.astype('f') scale = data.max() data /= scale
x = data[:-1] y = data[1:] print('x:',len(x)) print('y:', len(y))
print('x:', np.shape(x), ' y:', np.shape(y)) x = x.reshape(len(x), 1, 1) y = y.reshape(len(y), 1) print('x:', np.shape(x), ' y:', np.shape(y))
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 0) print('X_train:', x_train.shape) print('X_test :', x_test.shape) print('y_train:', y_train.shape) print('y_test :', y_test.shape)
model = Sequential() model.add(LSTM(30, batch_input_shape=(None, 1, 1))) # 中間層が30のLSTM model.add(Dense(1)) # 回帰なので最後の出力値は1つ
model.compile(loss='mean_squared_error', optimizer=Adam())
model.summary()
batch_size = 20 n_epoch = 200 hist = model.fit(x_train, y_train, epochs=n_epoch, validation_data=(x_test, y_test), verbose=0, batch_size=batch_size)
y_pred = model.predict(x) plt.plot(data, color='blue') # 実測値 plt.plot(y_pred, color='red') # 予測値 plt.show()
def plot_history_loss(hist): plt.plot(hist.history['loss'],label="loss for training") plt.plot(hist.history['val_loss'],label="loss for validation") plt.title('model loss') plt.xlabel('epoch') plt.ylabel('loss') plt.legend(loc='best') plt.show() plot_history_loss(hist)
pickleのシリアライズとデシリアライズで保存と復元
import pickle obj = '保存されるオブジェクト'
with open('sample.pickle','wb') as f: pickle.dump(obj, f)
with open('sample.pickle','rb') as f: loaded_obj = pickle.load(f)
print(loaded_obj)
optimizer = optimizers.SGD(lr=0.01)
optimizer = optimizers.SGD(lr=0.01, momentum=0.9)
optimizer = optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)
optimizer = optimizers.Adagrad(lr=0.01, epsilon=None, decay=0.0)
商品の自動タグ付けのモデル定義に以下を追加。
es = EarlyStopping(monitor='val_loss', patience=5, mode='min', verbose=0) hist = model.fit(x_train_std , y_train, validation_data=(x_test_std, y_test), batch_size=batch_size, epochs=n_epoch, verbose=1, callbacks=[es]) # EarlyStoppingを適用
mc = ModelCheckpoint(monitor='val_loss', mode='min', verbose=1, filepath='./dl4', save_best_only=True) hist = model.fit(x_train_std , y_train, validation_data=(x_test_std, y_test), batch_size=batch_size, epochs=n_epoch, verbose=1, callbacks=[mc]) # ModelCheckpointを適用
# 無効化比率0.5のDropout model.add(Dropout(rate=0.5))
model.add(BatchNormalization())
# モデルはjson形式 json_string = model.to_json() with open('mnist.model', 'w') as f: f.write(json_string) # パラメータはhdf5形式 model.save_weights('param.hdf5')
# モデルはjson形式 with open('mnist.model', 'r') as f: json_string = f.read() model = model_from_json(json_string) # パラメータはhdf5形式 model.load_weights('param.hdf5')