少し前にBERTについて学びました。その発展形を何個か試したので備忘録的な感じでコードを残しておきます。
はじめに
使いたいモデルがはっきりしている場合は以下のような書き方でも大丈夫ですが、AutoModelはライブラリから呼び出して使えるモデルのラッパークラスも存在しますので、いつかまとめようと思います。
distilbert編
Tokenizer
from transformers import BertTokenizer,DistilBertTokenizer
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from transformers import TFDistilBertForSequenceClassification
from transformers import TFBertModel,TFDistilBertModel
MODEL_NAME="distilbert-base-uncased"
tokenizer = DistilBertTokenizer.from_pretrained(MODEL_NAME)
# テキストのリストを専用の入力データに変換
def to_features(texts, max_length):
shape = (len(texts), max_length)
# input_idsやattention_mask, token_type_ids
input_ids = np.zeros(shape, dtype="int32")
# attention_mask = np.zeros(shape, dtype="int32")
for i, text in enumerate(texts):
encoded_dict = tokenizer.encode_plus(text, max_length=max_length, pad_to_max_length=True,truncation=True)
input_ids[i] = encoded_dict["input_ids"]
# attention_mask[i] = encoded_dict["attention_mask"]
return [tf.cast(input_ids, tf.int32)]
データに適用
max_length=500
x_train = to_features(train_texts, max_length)
y_train=tf.cast(train_labels, tf.int32)
x_valid = to_features(valid_texts, max_length)
y_valid=tf.cast(valid_labels, tf.int32)
分類モデル構築の例
from tensorflow import keras
from tensorflow.keras import optimizers, losses, metrics
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
def build_model(transformer, max_len=512):
input_word_ids = Input(shape=(max_len,), dtype="int32", name="input_ids")
sequence_output = transformer(input_word_ids)[0]
## 入力文章の1単語目[CLS]の特徴量を使用して、多値分類
cls_token = sequence_output[:, 0, :]
out = Dense(128, activation='relu')(cls_token)
out = Dense(4, activation='softmax')(out)
model = Model(inputs=input_word_ids, outputs=out)
adam=optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(optimizer=adam,
loss='sparse_categorical_crossentropy',
metrics=['acc'])
return model
transformer_layer = TFDistilBertModel.from_pretrained(MODEL_NAME)
# 層をfreeze(学習させないように)するには以下
#transformer_layer.trainable= not True
model = build_model(transformer_layer, max_len=500)
model.summary()
roberta編
Tokenizer
from transformers import BertTokenizer,DistilBertTokenizer
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from transformers import RobertaTokenizer,DebertaV2Tokenizer
# MDL_PATH="microsoft/deberta-v3-base"
MDL_PATH="roberta-base"
tokenizer = RobertaTokenizer.from_pretrained(MDL_PATH)
# テキストのリストを専用の入力データに変換
def to_features(texts, max_length):
shape = (len(texts), max_length)
# input_idsやattention_mask, token_type_ids
input_ids = np.zeros(shape, dtype="int32")
attention_mask = np.zeros(shape, dtype="int32")
for i, text in enumerate(texts):
encoded_dict = tokenizer.encode_plus(text, max_length=max_length, pad_to_max_length=True,truncation=True)
input_ids[i] = encoded_dict["input_ids"]
attention_mask[i] = encoded_dict["attention_mask"]
return [tf.cast(input_ids, tf.int32), tf.cast(attention_mask, tf.int32)]
データに適用
max_length=500
x_train = to_features(train_texts, max_length)
y_train=tf.cast(train_labels, tf.int32)
x_valid = to_features(valid_texts, max_length)
y_valid=tf.cast(valid_labels, tf.int32)
分類器の例
from tensorflow import keras
from tensorflow.keras import optimizers, losses, metrics
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout,Conv1D,Conv2D,GlobalMaxPooling1D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
filters=250 #channel_size
kernel_size=3 #filter size
def build_model(roberta_model, max_len=512):
input_ids = Input(shape=(max_length, ), dtype='int32', name='input_ids')
attention_mask = Input(shape=(max_length, ), dtype='int32', name='attention_mask')
sequence_output = roberta_model(input_ids = input_ids, attention_mask = attention_mask)[0]
## 入力文章の1単語目[CLS]の特徴量を使用して、多値分類
# cls_token = sequence_output[:, 0, :]
# document_encodings = tf.squeeze(cls_token, axis=1)
out = Conv1D(filters,kernel_size,padding="valid",activation="relu",strides=1)(sequence_output)
out = GlobalMaxPooling1D()(out)
out = Dense(512, activation='relu')(out)
# out = Dense(512, activation='relu')(out)
# out = Dense(256, activation='relu')(out)
# out = Dense(128, activation='relu')(out)
out = Dropout(0.2)(out)
out = Dense(4, activation='softmax')(out)
model = Model(inputs = [input_ids, attention_mask],
outputs = out)
adam=optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(optimizer=adam,
loss='sparse_categorical_crossentropy',
metrics=['acc'])
return model
from transformers import TFAutoModel,TFRobertaModel,TFDebertaV2Model
roberta_model = TFRobertaModel.from_pretrained(MDL_PATH)
# 層をfreeze(学習させないように)するには以下
# roberta_model.trainable= not True
model = build_model(roberta_model, max_len=500)
model.summary()
まとめ
ほぼコードだけですが誰かの役になれば幸いです。そして、自分も書き方を忘れたらこの記事に戻ってこようと思います。
参考記事:TransformerとBERTの使い方(テキスト分類用コードまとめ)
TransformerとBERTについて簡潔にまとめてみました。原理を理解しなくても使うだけなら、コピペでも大丈夫なようにコードを載せてあります。ぜひ読んでみてください!
コメント