Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimizing LSTM with embedding matrix - TypeError: module, class, method, function, traceback, frame, or code object was expected, got tuple #262

Open
jlperezg opened this issue Nov 27, 2019 · 1 comment

Comments

@jlperezg
Copy link

First of all thank you very much for your work,

I'm trying to use your framework to optimize hiperparameters in my LSTM network in order to implement a sentiment analysis classifier.

I used some snippet you posted but I cannot make it work. I think the main issue is how to calculate embedding_matrix (I'm using word embedings) to train the network. I trained separately tokenize to get weights file.

I'm getting the following error:

Hyperas search space:

def get_space():
return {
'Dropout': hp.uniform('Dropout', 0, 1),
'optimizer': hp.choice('optimizer', ['rmsprop', 'adam', 'sgd']),
}

Traceback (most recent call last):
File "optim_keras.py", line 132, in
best_run = optim.minimize(model=keras_model,data=get_data(),algo=tpe.suggest,max_evals=10,trials=Trials())
File "/usr/bin/anaconda3/lib/python3.7/site-packages/hyperas/optim.py", line 69, in minimize
keep_temp=keep_temp)
File "/usr/bin/anaconda3/lib/python3.7/site-packages/hyperas/optim.py", line 98, in base_minimizer
model_str = get_hyperopt_model_string(model, data, functions, notebook_name, verbose, stack)
File "/usr/bin/anaconda3/lib/python3.7/site-packages/hyperas/optim.py", line 198, in get_hyperopt_model_string
data_string = retrieve_data_string(data, verbose)
File "/usr/bin/anaconda3/lib/python3.7/site-packages/hyperas/optim.py", line 219, in retrieve_data_string
data_string = inspect.getsource(data)
File "/usr/bin/anaconda3/lib/python3.7/inspect.py", line 973, in getsource
lines, lnum = getsourcelines(object)
File "/usr/bin/anaconda3/lib/python3.7/inspect.py", line 955, in getsourcelines
lines, lnum = findsource(object)
File "/usr/bin/anaconda3/lib/python3.7/inspect.py", line 768, in findsource
file = getsourcefile(object)
File "/usr/bin/anaconda3/lib/python3.7/inspect.py", line 684, in getsourcefile
filename = getfile(object)
File "/usr/bin/anaconda3/lib/python3.7/inspect.py", line 666, in getfile
type(object).name))
TypeError: module, class, method, function, traceback, frame, or code object was expected, got tuple

Thank you in advance for your help

Here's my code


from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform



def get_data():
  import pickle
  from keras.preprocessing import sequence
  from keras.models import Sequential
  from keras.layers.core import Dense, Dropout, Activation
  from keras.layers.embeddings import Embedding
  from keras.layers.recurrent import LSTM
  from keras.datasets import imdb
  from keras.callbacks import EarlyStopping, ModelCheckpoint
  from keras.preprocessing.sequence import pad_sequences
  from keras.utils import to_categorical
  from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
  import classes.filtros as NT
  import classes.data_processing as DP
  import classes.embeddings as EB
  import classes.model as M
  import classes.token as T
  import classes.parameters as Params
  import pandas as pd
  import numpy as np
  import sys
  import time 

  # Carga de datos 
  print('Loading data...')
  #instanciamos objetos necesarios
  text_array = NT.NormalizeText()
  data_processing = DP.DataProcessing()
  word_embedding = EB.ProcessEmbeddings()
  w2v = word_embedding.get_word2vec(Params.W2V_FILE)
  data_set = data_processing.load_data()

  # Separacion de los valores para tener un training set equilibrado
  neutros = [row for row in data_set if 0  == row[2]]
  positiv = [row for row in data_set if 1  == row[2]]
  negativ = [row for row in data_set if -1 == row[2]]


  df_neutros = pd.DataFrame.from_records(neutros)
  df_positiv = pd.DataFrame.from_records(positiv)
  df_negativ = pd.DataFrame.from_records(negativ)

  minimo = np.min([len(df_neutros),len(df_positiv),len(df_negativ)])
  df_final = pd.concat([df_neutros[:minimo], df_positiv[:minimo], df_negativ[:minimo]], ignore_index=True)

    # Cargamos el tokenizer aprendido
  token_path = './models/Tokenizer.pkl'
  t_m = T.TokenizerModel()
  with open(token_path, 'rb') as f:
      t_m.t = pickle.load(f)


  # # Procesado del texto y generación del token list
  filtered = pd.DataFrame(columns=['textos'])
  for row in df_final.itertuples():
    texto_filt = word_embedding.clean_text(row._2)
    filtered.loc[row.Index] = texto_filt

  encoded_docs = t_m.t.texts_to_sequences(filtered['textos'])
  
  # Codificamos los Documentos de entrada
  X = pad_sequences(encoded_docs, maxlen=Params.MAX_SEQUENCE_LENGTH, padding='post')
  y = df_final[2]
  # # Separamos en train y test 
  sss = StratifiedShuffleSplit(n_splits=15,test_size=0.15)

  for train_index, test_index in sss.split(X, y):
      X_train, X_test = X[train_index], X[test_index]
      Y_train, Y_test = y[train_index], y[test_index]

  # # Debemos cambiar a categorical las etiquetas dada la función de pérdida que usamos en el entrenamiento
  y_train_bin = to_categorical(Y_train, num_classes=3, dtype='int32')
  y_test_bin = to_categorical(Y_test, num_classes=3, dtype='int32')

  return X_train,y_train_bin,X_test,y_test_bin


def keras_model(X_train,y_train_bin,X_test,y_test_bin):
  import pickle
# Definición del modelo y entrenamiento

  word_embedding = EB.ProcessEmbeddings()
  w2v = word_embedding.get_word2vec(Params.W2V_FILE)
  text_array = NT.NormalizeText()
  # Cargamos el tokenizer aprendido
  token_path = './models/Tokenizer.pkl'
  t_m = T.TokenizerModel()
  with open(token_path, 'rb') as f:
      t_m.t = pickle.load(f)

  # Generación de la embedding matrix (vocab_size, t, w2v, text_array)
  embedding_matrix = word_embedding.Generate_Matrix(Params.MAX_NB_WORDS, t_m.t, w2v,text_array)

  print('Build model...')
  # Modelo LSTM
  model   = Sequential()
  model.add(Embedding(Params.MAX_NB_WORDS, output_dim=Params.EMBEDDING_DIM, input_length=Params.MAX_SEQUENCE_LENGTH, weights=[embedding_matrix], trainable=False))
  model.add(Bidirectional(LSTM(Params.LSTM_UNITS_1ST,  return_sequences=False)))
  model.add(Dropout({{uniform(0, 1)}}))
  model.add(Dense(3, activation='softmax'))

  model.compile(optimizer={{choice(['rmsprop', 'adam', 'sgd'])}}, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
  model.train(X_train, y_train_bin, X_test, y_test_bin)



  early_stopping = EarlyStopping(monitor='val_loss', patience=4)
  checkpointer = ModelCheckpoint(filepath='keras_weights.hdf5',
                                   verbose=1,
                                   save_best_only=True)

  hist = model.fit(X_train, y_train_bin,
                     nb_epoch=1,
                     validation_split=0.08,
                     show_accuracy=True,
                     callbacks=[early_stopping, checkpointer])

  score, acc = model.evaluate(X_test, y_test_bin, show_accuracy=True, verbose=0)

  print('Test accuracy:', acc)
  return {'loss': -acc, 'status': STATUS_OK}



if __name__ == '__main__':
    best_run = optim.minimize(model=keras_model,data=get_data(),algo=tpe.suggest,max_evals=10,trials=Trials())
    print(best_run)




@marianyamukuru
Copy link

Did you figure it out? I'm getting a similar error

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants