diff --git a/AttentionLayer.py b/AttentionLayer.py
new file mode 100644
index 0000000..063abd4
--- /dev/null
+++ b/AttentionLayer.py
@@ -0,0 +1,122 @@
+import tensorflow as tf
+import os
+from tensorflow.python.keras.layers import Layer
+from tensorflow.python.keras import backend as K
+
+
+class AttentionLayer(Layer):
+    """
+    This class implements Bahdanau attention (https://arxiv.org/pdf/1409.0473.pdf).
+    There are three sets of weights introduced W_a, U_a, and V_a
+     """
+
+    def __init__(self, **kwargs):
+        super(AttentionLayer, self).__init__(**kwargs)
+
+    def build(self, input_shape):
+        assert isinstance(input_shape, list)
+        # Create a trainable weight variable for this layer.
+
+        self.W_a = self.add_weight(name='W_a',
+                                   shape=tf.TensorShape((input_shape[0][2], input_shape[0][2])),
+                                   initializer='uniform',
+                                   trainable=True)
+        self.U_a = self.add_weight(name='U_a',
+                                   shape=tf.TensorShape((input_shape[1][2], input_shape[0][2])),
+                                   initializer='uniform',
+                                   trainable=True)
+        self.V_a = self.add_weight(name='V_a',
+                                   shape=tf.TensorShape((input_shape[0][2], 1)),
+                                   initializer='uniform',
+                                   trainable=True)
+
+        super(AttentionLayer, self).build(input_shape)  # Be sure to call this at the end
+
+    def call(self, inputs, verbose=False):
+        """
+        inputs: [encoder_output_sequence, decoder_output_sequence]
+        """
+        assert type(inputs) == list
+        encoder_out_seq, decoder_out_seq = inputs
+        if verbose:
+            print('encoder_out_seq>', encoder_out_seq.shape)
+            print('decoder_out_seq>', decoder_out_seq.shape)
+
+        def energy_step(inputs, states):
+            """ Step function for computing energy for a single decoder state """
+
+            assert_msg = "States must be a list. However states {} is of type {}".format(states, type(states))
+            assert isinstance(states, list) or isinstance(states, tuple), assert_msg
+
+            """ Some parameters required for shaping tensors"""
+            en_seq_len, en_hidden = encoder_out_seq.shape[1], encoder_out_seq.shape[2]
+            de_hidden = inputs.shape[-1]
+
+            """ Computing S.Wa where S=[s0, s1, ..., si]"""
+            # <= batch_size*en_seq_len, latent_dim
+            reshaped_enc_outputs = K.reshape(encoder_out_seq, (-1, en_hidden))
+            # <= batch_size*en_seq_len, latent_dim
+            W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a), (-1, en_seq_len, en_hidden))
+            if verbose:
+                print('wa.s>',W_a_dot_s.shape)
+
+            """ Computing hj.Ua """
+            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a), 1)  # <= batch_size, 1, latent_dim
+            if verbose:
+                print('Ua.h>',U_a_dot_h.shape)
+
+            """ tanh(S.Wa + hj.Ua) """
+            # <= batch_size*en_seq_len, latent_dim
+            reshaped_Ws_plus_Uh = K.tanh(K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden)))
+            if verbose:
+                print('Ws+Uh>', reshaped_Ws_plus_Uh.shape)
+
+            """ softmax(va.tanh(S.Wa + hj.Ua)) """
+            # <= batch_size, en_seq_len
+            e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a), (-1, en_seq_len))
+            # <= batch_size, en_seq_len
+            e_i = K.softmax(e_i)
+
+            if verbose:
+                print('ei>', e_i.shape)
+
+            return e_i, [e_i]
+
+        def context_step(inputs, states):
+            """ Step function for computing ci using ei """
+            # <= batch_size, hidden_size
+            c_i = K.sum(encoder_out_seq * K.expand_dims(inputs, -1), axis=1)
+            if verbose:
+                print('ci>', c_i.shape)
+            return c_i, [c_i]
+
+        def create_inital_state(inputs, hidden_size):
+            # We are not using initial states, but need to pass something to K.rnn funciton
+            fake_state = K.zeros_like(inputs)  # <= (batch_size, enc_seq_len, latent_dim
+            fake_state = K.sum(fake_state, axis=[1, 2])  # <= (batch_size)
+            fake_state = K.expand_dims(fake_state)  # <= (batch_size, 1)
+            fake_state = K.tile(fake_state, [1, hidden_size])  # <= (batch_size, latent_dim
+            return fake_state
+
+        fake_state_c = create_inital_state(encoder_out_seq, encoder_out_seq.shape[-1])
+        fake_state_e = create_inital_state(encoder_out_seq, encoder_out_seq.shape[1])  # <= (batch_size, enc_seq_len, latent_dim
+
+        """ Computing energy outputs """
+        # e_outputs => (batch_size, de_seq_len, en_seq_len)
+        last_out, e_outputs, _ = K.rnn(
+            energy_step, decoder_out_seq, [fake_state_e],
+        )
+
+        """ Computing context vectors """
+        last_out, c_outputs, _ = K.rnn(
+            context_step, e_outputs, [fake_state_c],
+        )
+
+        return c_outputs, e_outputs
+
+    def compute_output_shape(self, input_shape):
+        """ Outputs produced by the layer """
+        return [
+            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[1][2])),
+            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[0][1]))
+        ]
\ No newline at end of file
diff --git a/__pycache__/AttentionLayer.cpython-37.pyc b/__pycache__/AttentionLayer.cpython-37.pyc
new file mode 100644
index 0000000..e300498
Binary files /dev/null and b/__pycache__/AttentionLayer.cpython-37.pyc differ
diff --git a/__pycache__/build_model_keras.cpython-37.pyc b/__pycache__/build_model_keras.cpython-37.pyc
new file mode 100644
index 0000000..6113130
Binary files /dev/null and b/__pycache__/build_model_keras.cpython-37.pyc differ
diff --git a/__pycache__/data_clean.cpython-37.pyc b/__pycache__/data_clean.cpython-37.pyc
new file mode 100644
index 0000000..993aaee
Binary files /dev/null and b/__pycache__/data_clean.cpython-37.pyc differ
diff --git a/__pycache__/data_manip.cpython-37.pyc b/__pycache__/data_manip.cpython-37.pyc
index 2fc3eac..79ff932 100644
Binary files a/__pycache__/data_manip.cpython-37.pyc and b/__pycache__/data_manip.cpython-37.pyc differ
diff --git a/__pycache__/train_model_keras.cpython-37.pyc b/__pycache__/train_model_keras.cpython-37.pyc
new file mode 100644
index 0000000..0309888
Binary files /dev/null and b/__pycache__/train_model_keras.cpython-37.pyc differ
diff --git a/build_model_keras.py b/build_model_keras.py
index b9efa8d..84768a9 100644
--- a/build_model_keras.py
+++ b/build_model_keras.py
@@ -1,37 +1,46 @@
-#building the model keras---an attempt to build a seq2seq model in keras
 from data_clean import *
 
+from keras import backend as K
+K.clear_session()
 
+latent_dim = 300
+embedding_dim=100
 
-def define_model(max_text_length,max_summary_length,n_units):
-     dim_rep=300
+# Encoder
+encoder_inputs = Input(shape=(max_text_len,))
 
+#embedding layer
+enc_emb =  Embedding(x_voc, embedding_dim,trainable=True)(encoder_inputs)
 
-     encoder_inputs=Input(shape=(None,max_text_length,dim_rep))
+#encoder lstm 1
+encoder_lstm1 = LSTM(latent_dim,return_sequences=True,return_state=True,dropout=0.4,recurrent_dropout=0.4)
+encoder_output1, state_h1, state_c1 = encoder_lstm1(enc_emb)
 
-     encoder=LSTM(n_units,return_state=True)
-     encoder_outputs, state_h, state_c = encoder(encoder_inputs)
-     encoder_states = [state_h, state_c]
+#encoder lstm 2
+encoder_lstm2 = LSTM(latent_dim,return_sequences=True,return_state=True,dropout=0.4,recurrent_dropout=0.4)
+encoder_output2, state_h2, state_c2 = encoder_lstm2(encoder_output1)
 
-     #define training decoder
-     decoder_inputs = Input(shape=(None, max_summary_length,dim_rep))
+#encoder lstm 3
+encoder_lstm3=LSTM(latent_dim, return_state=True, return_sequences=True,dropout=0.4,recurrent_dropout=0.4)
+encoder_outputs, state_h, state_c= encoder_lstm3(encoder_output2)
 
-     decoder_lstm = LSTM(n_units, return_sequences=True, return_state=True)
-     decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
-     decoder_dense = Dense(n_output, activation='softmax')
-     decoder_outputs = decoder_dense(decoder_outputs)
-     model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
+# Set up the decoder, using `encoder_states` as initial state.
+decoder_inputs = Input(shape=(None,))
 
-     #define inference encoder
-     encoder_model = Model(encoder_inputs, encoder_states)
+#embedding layer
+dec_emb_layer = Embedding(y_voc, embedding_dim,trainable=True)
+dec_emb = dec_emb_layer(decoder_inputs)
 
-     #define inference decoder
-     decoder_state_input_h = Input(shape=(n_units,))
-     decoder_state_input_c = Input(shape=(n_units,))
-     decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
-     decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs,  initial_state=decoder_states_inputs)
-     decoder_states = [state_h, state_c]
-     decoder_outputs = decoder_dense(decoder_outputs)
-     decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
+decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True,dropout=0.4,recurrent_dropout=0.2)
+decoder_outputs,decoder_fwd_state, decoder_back_state = decoder_lstm(dec_emb,initial_state=[state_h, state_c])
 
-     return model, encoder_model, decoder_model
+
+
+#dense layer
+decoder_dense =  TimeDistributed(Dense(y_voc, activation='softmax'))
+decoder_outputs = decoder_dense(decoder_outputs)
+
+# Define the model
+model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
+
+model.summary()
\ No newline at end of file
diff --git a/data_clean.py b/data_clean.py
index 1ff1a90..6a10ddc 100644
--- a/data_clean.py
+++ b/data_clean.py
@@ -1,244 +1,181 @@
 from data_manip import *
-import nltk
-import tensorflow as tf
-nltk.download('stopwords')
-
-def clean_text(text, remove_stopwords = True):
-
-    # Convert words to lower case
-    text = text.lower()
-    #[_\-;%()+&=*%.,!?:$@\[\]/]
-    # Replace contractions with their longer forms
-    if True:
-        text = text.split()
-        new_text = []
-        for word in text:
-            if word in contractions:
-                new_text.append(contractions[word])
-            else:
-                new_text.append(word)
-        text = " ".join(new_text)
-
-    # Format words and remove unwanted characters
-    text = re.sub(r'https?:\/\/.*[\r\n]*', '', text,
-                  flags=re.MULTILINE)
-    text = re.sub(r'\<a href', ' ', text)
-    text = re.sub(r'&amp;', '', text)
-    text = re.sub(r'[_"\-;%()+&=*%.,!?:#$@\[\]/]', '', text)
-    text = re.sub(r'<br />', ' ', text)
-    text = re.sub(r'\'', ' ', text)
-
-    # Optionally, remove stop words
-    if remove_stopwords:
-        text = text.split()
-        stops = set(stopwords.words("english"))
-        text = [w for w in text if not w in stops]
-        text = " ".join(text)
-    return text
-
-clean_summaries = []
-for summary in reviews.Summary:
-    clean_summaries.append(clean_text(summary, remove_stopwords=False))
-print("Summaries are complete.")
-
-clean_texts = []
-for text in reviews.Text:
-    clean_texts.append(clean_text(text))
-print("Texts are complete.")
-
-def count_words(count_dict, text):
-    '''Count the number of occurrences of each word in a set of text'''
-    for sentence in text:
-        for word in sentence.split():
-            if word not in count_dict:
-                count_dict[word] = 1
-            else:
-                count_dict[word] += 1
-word_counts = {}
-
-count_words(word_counts, clean_summaries)
-count_words(word_counts, clean_texts)
-
-print("Size of Vocabulary:", len(word_counts))
-
-embeddings_index = {}
-with open('/home/pbu/Downloads/numberbatch-en.txt', encoding='utf-8') as f:
-    for line in f:
-        values = line.split(' ')
-        word = values[0]
-        embedding = np.asarray(values[1:], dtype='float32')
-        embeddings_index[word] = embedding
-print('Word embeddings:', len(embeddings_index))
-
-# Find the number of words that are missing from CN, and are used more than our threshold.
-missing_words = 0
-threshold = 20
-
-for word, count in word_counts.items():
-    if count > threshold:
-        if word not in embeddings_index:
-            missing_words += 1
-
-missing_ratio = round(missing_words/len(word_counts),4)*100
-
-print("Number of words missing from CN:", missing_words)
-print("Percent of words that are missing from vocabulary: {}%".format(missing_ratio))
-
-# Limit the vocab that we will use to words that appear ≥ threshold or are in GloVe
-
-#dictionary to convert words to integers
-vocab_to_int = {}
-
-value = 0
-for word, count in word_counts.items():
-    if count >= threshold or word in embeddings_index:
-        vocab_to_int[word] = value
-        value += 1
-
-# Special tokens that will be added to our vocab
-codes = ["<UNK>","<PAD>","<EOS>","<GO>"]
-
-# Add codes to vocab
-for code in codes:
-    vocab_to_int[code] = len(vocab_to_int)
-
-# Dictionary to convert integers to words
-int_to_vocab = {}
-for word, value in vocab_to_int.items():
-    int_to_vocab[value] = word
-
-usage_ratio = round(len(vocab_to_int) / len(word_counts),4)*100
-
-print("Total number of unique words:", len(word_counts))
-print("Number of words we will use:", len(vocab_to_int))
-print("Percent of words we will use: {}%".format(usage_ratio))
-
-# Need to use 300 for embedding dimensions to match CN's vectors.
-embedding_dim = 300
-nb_words = len(vocab_to_int)
-
-# Create matrix with default values of zero
-word_embedding_matrix = np.zeros((nb_words, embedding_dim), dtype=np.float32)
-for word, i in vocab_to_int.items():
-    if word in embeddings_index:
-        word_embedding_matrix[i] = embeddings_index[word]
+
+stop_words = set(stopwords.words('english'))
+
+def text_cleaner(text,num):
+    newString = text.lower()
+    newString = BeautifulSoup(newString, "lxml").text
+    newString = re.sub(r'\([^)]*\)', '', newString)
+    newString = re.sub('"','', newString)
+    newString = ' '.join([contraction_mapping[t] if t in contraction_mapping else t for t in newString.split(" ")])
+    newString = re.sub(r"'s\b","",newString)
+    newString = re.sub("[^a-zA-Z]", " ", newString)
+    newString = re.sub('[m]{2,}', 'mm', newString)
+    if(num==0):
+        tokens = [w for w in newString.split() if not w in stop_words]
     else:
-        # If word not in CN, create a random embedding for it
-        new_embedding = np.array(np.random.uniform(-1.0, 1.0, embedding_dim))
-        embeddings_index[word] = new_embedding
-        word_embedding_matrix[i] = new_embedding
-
-# Check if value matches len(vocab_to_int)
-print(len(word_embedding_matrix))
-
-def convert_to_ints(text, word_count, unk_count, eos=False):
-    '''Convert words in text to an integer.
-       If word is not in vocab_to_int, use UNK's integer.
-       Total the number of words and UNKs.
-       Add EOS token to the end of texts'''
-    ints = []
-    for sentence in text:
-        sentence_ints = []
-        for word in sentence.split():
-            word_count += 1
-            if word in vocab_to_int:
-                sentence_ints.append(vocab_to_int[word])
-            else:
-                sentence_ints.append(vocab_to_int["<UNK>"])
-                unk_count += 1
-        if eos:
-            sentence_ints.append(vocab_to_int["<EOS>"])
-        ints.append(sentence_ints)
-    return ints, word_count, unk_count
-# Apply convert_to_ints to clean_summaries and clean_texts
-word_count = 0
-unk_count = 0
-
-int_summaries, word_count, unk_count = convert_to_ints(clean_summaries, word_count, unk_count)
-int_texts, word_count, unk_count = convert_to_ints(clean_texts, word_count, unk_count, eos=True)
-unk_percent = round(unk_count/word_count,4)*100
-
-print("Total number of words in headlines:", word_count)
-print("Total number of UNKs in headlines:", unk_count)
-print("Percent of words that are UNK: {}%".format(unk_percent))
-
-
-def create_lengths(text):
-    '''Create a data frame of the sentence lengths from a text'''
-    lengths = []
-    for sentence in text:
-        lengths.append(len(sentence))
-
-    return pd.DataFrame(lengths, columns=['counts'])
-
-lengths_summaries = create_lengths(int_summaries)
-lengths_texts = create_lengths(int_texts)
-
-print("Summaries:")
-print(lengths_summaries.describe())
-print()
-print("Texts:")
-print(lengths_texts.describe())
-
-
-def unk_counter(sentence):
-    '''Counts the number of time UNK appears in a sentence.'''
-    unk_count = 0
-    for word in sentence:
-        if word == vocab_to_int["<UNK>"]:
-            unk_count += 1
-    return unk_count
-
-# Sort the summaries and texts by the length of the texts, shortest to longest
-# Limit the length of summaries and texts based on the min and max ranges.
-# Remove reviews that include too many UNKs
-
-sorted_summaries = []
-sorted_texts = []
-max_text_length = 84
-max_summary_length = 13
-min_length = 2
-unk_text_limit = 1
-unk_summary_limit = 0
-
-for length in range(min(lengths_texts.counts), max_text_length):
-    for count, words in enumerate(int_summaries):
-        if (len(int_summaries[count]) >= min_length and
-            len(int_summaries[count]) <= max_summary_length and
-            len(int_texts[count]) >= min_length and
-            unk_counter(int_summaries[count]) <= unk_summary_limit and
-            unk_counter(int_texts[count]) <= unk_text_limit and
-            length == len(int_texts[count])
-           ):
-            sorted_summaries.append(int_summaries[count])
-            sorted_texts.append(int_texts[count])
-
-# Compare lengths to ensure they match
-print(len(sorted_summaries))
-print(len(sorted_texts))
-#print(np.array(sorted_texts).shape)
-
-def pad_sentence_batch(sentence,max_length):
-    """Pad sentences with <PAD> so that each sentence of a batch has the same length"""
-
-    return [sentence + [vocab_to_int['<PAD>']] * (max_length - len(sentence))]
-
-def process_encoding_input(target_data, vocab_to_int, batch_size):
-    '''Remove the last word id from each batch and concat the <GO> to the begining of each batch'''
-
-    ending = tf.strided_slice(target_data, [0, 0], [batch_size, -1], [1, 1])
-    dec_input = tf.concat([tf.fill([batch_size, 1], vocab_to_int['<GO>']), ending], 1)
-
-    return dec_input
-
-padded_summaries=list()
-padded_text=list()
-
-for summaries in sorted_summaries:
-    summaries=process_encoding_input(summaries,vocab_to_int,1)
-    summaries=pad_sentence_batch(summaries,max_summary_length)
-    padded_summaries.append(summaries)
-
-for text in sorted_texts:
-    text=pad_sentence_batch(text,max_text_length)
-    padded_text.append(text)
+        tokens=newString.split()
+    long_words=[]
+    for i in tokens:
+        if len(i)>1:                                                 #removing short word
+            long_words.append(i)
+    return (" ".join(long_words)).strip()
+
+cleaned_text = []
+for t in data['Text']:
+    cleaned_text.append(text_cleaner(t,0))
+
+cleaned_summary = []
+for t in data['Summary']:
+    cleaned_summary.append(text_cleaner(t,1))
+
+data['cleaned_text']=cleaned_text
+data['cleaned_summary']=cleaned_summary
+'''
+import matplotlib.pyplot as plt
+
+text_word_count = []
+summary_word_count = []
+
+# populate the lists with sentence lengths
+for i in data['cleaned_text']:
+      text_word_count.append(len(i.split()))
+
+for i in data['cleaned_summary']:
+      summary_word_count.append(len(i.split()))
+
+length_df = pd.DataFrame({'text':text_word_count, 'summary':summary_word_count})
+
+length_df.hist(bins = 30)
+plt.show()
+'''
+
+cnt=0
+for i in data['cleaned_summary']:
+    if(len(i.split())<=8):
+        cnt=cnt+1
+print(cnt/len(data['cleaned_summary']))
+
+max_text_len=30
+max_summary_len=8
+
+cleaned_text = np.array(data['cleaned_text'])
+cleaned_summary = np.array(data['cleaned_summary'])
+
+short_text = []
+short_summary = []
+
+for i in range(len(cleaned_text)):
+    if (len(cleaned_summary[i].split()) <= max_summary_len and len(cleaned_text[i].split()) <= max_text_len):
+        short_text.append(cleaned_text[i])
+        short_summary.append(cleaned_summary[i])
+
+df = pd.DataFrame({'text': short_text, 'summary': short_summary})
+
+df['summary'] = df['summary'].apply(lambda x : 'sostok '+ x + ' eostok')
+
+from sklearn.model_selection import train_test_split
+x_tr,x_val,y_tr,y_val=train_test_split(np.array(df['text']),np.array(df['summary']),test_size=0.1,random_state=0,shuffle=True)
+
+
+from keras.preprocessing.text import Tokenizer
+from keras.preprocessing.sequence import pad_sequences
+
+#prepare a tokenizer for reviews on training data
+x_tokenizer = Tokenizer()
+x_tokenizer.fit_on_texts(list(x_tr))
+
+thresh = 4
+
+cnt = 0
+tot_cnt = 0
+freq = 0
+tot_freq = 0
+
+for key, value in x_tokenizer.word_counts.items():
+    tot_cnt = tot_cnt + 1
+    tot_freq = tot_freq + value
+    if (value < thresh):
+        cnt = cnt + 1
+        freq = freq + value
+
+print("% of rare words in vocabulary:", (cnt / tot_cnt) * 100)
+print("Total Coverage of rare words:", (freq / tot_freq) * 100)
+
+#prepare a tokenizer for reviews on training data
+x_tokenizer = Tokenizer(num_words=tot_cnt-cnt)
+x_tokenizer.fit_on_texts(list(x_tr))
+
+#convert text sequences into integer sequences
+x_tr_seq    =   x_tokenizer.texts_to_sequences(x_tr)
+x_val_seq   =   x_tokenizer.texts_to_sequences(x_val)
+
+#padding zero upto maximum length
+x_tr    =   pad_sequences(x_tr_seq,  maxlen=max_text_len, padding='post')
+x_val   =   pad_sequences(x_val_seq, maxlen=max_text_len, padding='post')
+
+#size of vocabulary ( +1 for padding token)
+x_voc   =  x_tokenizer.num_words + 1
+
+print(x_voc)
+
+#prepare a tokenizer for reviews on training data
+y_tokenizer = Tokenizer()
+y_tokenizer.fit_on_texts(list(y_tr))
+
+thresh = 6
+
+cnt = 0
+tot_cnt = 0
+freq = 0
+tot_freq = 0
+
+for key, value in y_tokenizer.word_counts.items():
+    tot_cnt = tot_cnt + 1
+    tot_freq = tot_freq + value
+    if (value < thresh):
+        cnt = cnt + 1
+        freq = freq + value
+
+print("% of rare words in vocabulary:", (cnt / tot_cnt) * 100)
+print("Total Coverage of rare words:", (freq / tot_freq) * 100)
+
+#prepare a tokenizer for reviews on training data
+y_tokenizer = Tokenizer(num_words=tot_cnt-cnt)
+y_tokenizer.fit_on_texts(list(y_tr))
+
+#convert text sequences into integer sequences
+y_tr_seq    =   y_tokenizer.texts_to_sequences(y_tr)
+y_val_seq   =   y_tokenizer.texts_to_sequences(y_val)
+
+#padding zero upto maximum length
+y_tr    =   pad_sequences(y_tr_seq, maxlen=max_summary_len, padding='post')
+y_val   =   pad_sequences(y_val_seq, maxlen=max_summary_len, padding='post')
+
+#size of vocabulary
+y_voc  =   y_tokenizer.num_words +1
+y_tokenizer.word_counts['sostok'],len(y_tr)
+
+ind=[]
+for i in range(len(y_tr)):
+    cnt=0
+    for j in y_tr[i]:
+        if j!=0:
+            cnt=cnt+1
+    if(cnt==2):
+        ind.append(i)
+
+y_tr=np.delete(y_tr,ind, axis=0)
+x_tr=np.delete(x_tr,ind, axis=0)
+
+ind=[]
+for i in range(len(y_val)):
+    cnt=0
+    for j in y_val[i]:
+        if j!=0:
+            cnt=cnt+1
+    if(cnt==2):
+        ind.append(i)
+
+y_val=np.delete(y_val,ind, axis=0)
+x_val=np.delete(x_val,ind, axis=0)
\ No newline at end of file
diff --git a/data_manip.py b/data_manip.py
index 10cc1f3..aa8c0a8 100644
--- a/data_manip.py
+++ b/data_manip.py
@@ -1,101 +1,38 @@
 import numpy as np
-import re
 import pandas as pd
-import numpy as np
-#import tensorflow as tf
 import re
+from bs4 import BeautifulSoup
+from keras.preprocessing.text import Tokenizer
+from keras.preprocessing.sequence import pad_sequences
 from nltk.corpus import stopwords
-import time
-#from tensorflow.python.layers.core import Dense
-#from tensorflow.python.ops.rnn_cell_impl import _zero_state_tensors
-reviews = pd.read_csv("/home/pbu/Downloads/amazon-fine-food-reviews/Reviews.csv")
-#print(reviews.shape)
-print(reviews.isnull().sum())
-# Remove null values and unneeded features
-reviews = reviews.dropna()
-reviews = reviews.drop(['Id','ProductId','UserId','ProfileName','HelpfulnessNumerator','HelpfulnessDenominator',
-                        'Score','Time'], 1)
-reviews = reviews.reset_index(drop=True)
+from keras.layers import Input, LSTM, Embedding, Dense, Concatenate, TimeDistributed
+from keras.models import Model
+from keras.callbacks import ModelCheckpoint
+
+data=pd.read_csv("/home/pbu/Downloads/amazon-fine-food-reviews/Reviews.csv",nrows=100000)
+data.drop_duplicates(subset=['Text'],inplace=True)#dropping duplicates
+data.dropna(axis=0,inplace=True)#dropping na
 
-'''
-for i in range(5):
-    print("Review #",i+1)
-    print(reviews.Summary[i])
-    print(reviews.Text[i])
-    print('\n')
-'''
-contractions = {
-"ain't": "am not",
-"aren't": "are not",
-"can't": "cannot",
-"can't've": "cannot have",
-"'cause": "because",
-"could've": "could have",
-"couldn't": "could not",
-"couldn't've": "could not have",
-"didn't": "did not",
-"doesn't": "does not",
-"don't": "do not",
-"hadn't": "had not",
-"hadn't've": "had not have",
-"hasn't": "has not",
-"haven't": "have not",
-"he'd": "he would",
-"he'd've": "he would have",
-"he'll": "he will",
-"he's": "he is",
-"how'd": "how did",
-"how'll": "how will",
-"how's": "how is",
-"i'd": "i would",
-"i'll": "i will",
-"i'm": "i am",
-"i've": "i have",
-"isn't": "is not",
-"it'd": "it would",
-"it'll": "it will",
-"it's": "it is",
-"let's": "let us",
-"ma'am": "madam",
-"mayn't": "may not",
-"might've": "might have",
-"mightn't": "might not",
-"must've": "must have",
-"mustn't": "must not",
-"needn't": "need not",
-"oughtn't": "ought not",
-"shan't": "shall not",
-"sha'n't": "shall not",
-"she'd": "she would",
-"she'll": "she will",
-"she's": "she is",
-"should've": "should have",
-"shouldn't": "should not",
-"that'd": "that would",
-"that's": "that is",
-"there'd": "there had",
-"there's": "there is",
-"they'd": "they would",
-"they'll": "they will",
-"they're": "they are",
-"they've": "they have",
-"wasn't": "was not",
-"we'd": "we would",
-"we'll": "we will",
-"we're": "we are",
-"we've": "we have",
-"weren't": "were not",
-"what'll": "what will",
-"what're": "what are",
-"what's": "what is",
-"what've": "what have",
-"where'd": "where did",
-"where's": "where is",
-"who'll": "who will",
-"who's": "who is",
-"won't": "will not",
-"wouldn't": "would not",
-"you'd": "you would",
-"you'll": "you will",
-"you're": "you are"
-}
+contraction_mapping = {"ain't": "is not", "aren't": "are not","can't": "cannot", "'cause": "because", "could've": "could have", "couldn't": "could not",
+                           "didn't": "did not",  "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hasn't": "has not", "haven't": "have not",
+                           "he'd": "he would","he'll": "he will", "he's": "he is", "how'd": "how did", "how'd'y": "how do you", "how'll": "how will", "how's": "how is",
+                           "I'd": "I would", "I'd've": "I would have", "I'll": "I will", "I'll've": "I will have","I'm": "I am", "I've": "I have", "i'd": "i would",
+                           "i'd've": "i would have", "i'll": "i will",  "i'll've": "i will have","i'm": "i am", "i've": "i have", "isn't": "is not", "it'd": "it would",
+                           "it'd've": "it would have", "it'll": "it will", "it'll've": "it will have","it's": "it is", "let's": "let us", "ma'am": "madam",
+                           "mayn't": "may not", "might've": "might have","mightn't": "might not","mightn't've": "might not have", "must've": "must have",
+                           "mustn't": "must not", "mustn't've": "must not have", "needn't": "need not", "needn't've": "need not have","o'clock": "of the clock",
+                           "oughtn't": "ought not", "oughtn't've": "ought not have", "shan't": "shall not", "sha'n't": "shall not", "shan't've": "shall not have",
+                           "she'd": "she would", "she'd've": "she would have", "she'll": "she will", "she'll've": "she will have", "she's": "she is",
+                           "should've": "should have", "shouldn't": "should not", "shouldn't've": "should not have", "so've": "so have","so's": "so as",
+                           "this's": "this is","that'd": "that would", "that'd've": "that would have", "that's": "that is", "there'd": "there would",
+                           "there'd've": "there would have", "there's": "there is", "here's": "here is","they'd": "they would", "they'd've": "they would have",
+                           "they'll": "they will", "they'll've": "they will have", "they're": "they are", "they've": "they have", "to've": "to have",
+                           "wasn't": "was not", "we'd": "we would", "we'd've": "we would have", "we'll": "we will", "we'll've": "we will have", "we're": "we are",
+                           "we've": "we have", "weren't": "were not", "what'll": "what will", "what'll've": "what will have", "what're": "what are",
+                           "what's": "what is", "what've": "what have", "when's": "when is", "when've": "when have", "where'd": "where did", "where's": "where is",
+                           "where've": "where have", "who'll": "who will", "who'll've": "who will have", "who's": "who is", "who've": "who have",
+                           "why's": "why is", "why've": "why have", "will've": "will have", "won't": "will not", "won't've": "will not have",
+                           "would've": "would have", "wouldn't": "would not", "wouldn't've": "would not have", "y'all": "you all",
+                           "y'all'd": "you all would","y'all'd've": "you all would have","y'all're": "you all are","y'all've": "you all have",
+                           "you'd": "you would", "you'd've": "you would have", "you'll": "you will", "you'll've": "you will have",
+                           "you're": "you are", "you've": "you have"}
\ No newline at end of file
diff --git a/inference_model_keras.py b/inference_model_keras.py
new file mode 100644
index 0000000..a6b88e2
--- /dev/null
+++ b/inference_model_keras.py
@@ -0,0 +1,85 @@
+from train_model_keras import *
+
+reverse_target_word_index=y_tokenizer.index_word
+reverse_source_word_index=x_tokenizer.index_word
+target_word_index=y_tokenizer.word_index
+
+# Encode the input sequence to get the feature vector
+encoder_model = Model(inputs=encoder_inputs,outputs=[encoder_outputs, state_h, state_c])
+
+# Decoder setup
+# Below tensors will hold the states of the previous time step
+decoder_state_input_h = Input(shape=(latent_dim,))
+decoder_state_input_c = Input(shape=(latent_dim,))
+decoder_hidden_state_input = Input(shape=(max_text_len,latent_dim))
+
+# Get the embeddings of the decoder sequence
+dec_emb2= dec_emb_layer(decoder_inputs)
+# To predict the next word in the sequence, set the initial states to the states from the previous time step
+decoder_outputs2, state_h2, state_c2 = decoder_lstm(dec_emb2, initial_state=[decoder_state_input_h, decoder_state_input_c])
+
+
+
+# A dense softmax layer to generate prob dist. over the target vocabulary
+decoder_outputs2 = decoder_dense(decoder_outputs2)
+
+# Final decoder model
+decoder_model = Model(
+    [decoder_inputs] + [decoder_hidden_state_input,decoder_state_input_h, decoder_state_input_c],
+    [decoder_outputs2] + [state_h2, state_c2])
+
+
+def decode_sequence(input_seq):
+    # Encode the input as state vectors.
+    e_out, e_h, e_c = encoder_model.predict(input_seq)
+
+    # Generate empty target sequence of length 1.
+    target_seq = np.zeros((1, 1))
+
+    # Populate the first word of target sequence with the start word.
+    target_seq[0, 0] = target_word_index['sostok']
+
+    stop_condition = False
+    decoded_sentence = ''
+    while not stop_condition:
+
+        output_tokens, h, c = decoder_model.predict([target_seq] + [e_out, e_h, e_c])
+
+        # Sample a token
+        sampled_token_index = np.argmax(output_tokens[0, -1, :])
+        sampled_token = reverse_target_word_index[sampled_token_index]
+
+        if (sampled_token != 'eostok'):
+            decoded_sentence += ' ' + sampled_token
+
+        # Exit condition: either hit max length or find stop word.
+        if (sampled_token == 'eostok' or len(decoded_sentence.split()) >= (max_summary_len - 1)):
+            stop_condition = True
+
+        # Update the target sequence (of length 1).
+        target_seq = np.zeros((1, 1))
+        target_seq[0, 0] = sampled_token_index
+
+        # Update internal states
+        e_h, e_c = h, c
+
+    return decoded_sentence
+
+def seq2summary(input_seq):
+    newString=''
+    for i in input_seq:
+        if((i!=0 and i!=target_word_index['sostok']) and i!=target_word_index['eostok']):
+            newString=newString+reverse_target_word_index[i]+' '
+    return newString
+
+def seq2text(input_seq):
+    newString=''
+    for i in input_seq:
+        if(i!=0):
+            newString=newString+reverse_source_word_index[i]+' '
+    return newString
+for i in range(0,5):
+    print("Review:",seq2text(x_tr[i]))
+    print("Original summary:",seq2summary(y_tr[i]))
+    print("Predicted summary:",decode_sequence(x_tr[i].reshape(1,max_text_len)))
+    print("\n")
\ No newline at end of file
diff --git a/seq2seq_keras_sp.h5 b/seq2seq_keras_sp.h5
new file mode 100644
index 0000000..ce0af92
Binary files /dev/null and b/seq2seq_keras_sp.h5 differ
diff --git a/train_model_keras.py b/train_model_keras.py
index 93686da..f3d5d3b 100644
--- a/train_model_keras.py
+++ b/train_model_keras.py
@@ -1,17 +1,13 @@
-#training the model in keras
 from build_model_keras import *
-start = 200000
-end = start + 50000
-sorted_summaries_short = sorted_summaries[start:end]
-sorted_texts_short = sorted_texts[start:end]
-print("The shortest text length:", len(sorted_texts_short[0]))
-print("The longest text length:",len(sorted_texts_short[-1]))
 
-embeddings=word_embedding_matrix
-enc_embed_input=tf.nn.embedding_lookup(embeddings,padded_text)
-dec_embed_input=tf.nn.embedding_lookup(embeddings,padded_summaries)
-n_units=100
-model,encoder_model,decoder_model=define_model(max_text_length,max_summary_length,n_units)
+model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy')
+checkpointer = ModelCheckpoint(filepath='seq2seq'+"_keras_sp.h5", verbose=1, save_best_only=True)
 
+history=model.fit([x_tr,y_tr[:,:-1]], y_tr.reshape(y_tr.shape[0],y_tr.shape[1], 1)[:,1:] ,epochs=50,
+                  callbacks=[checkpointer],batch_size=128, validation_data=([x_val,y_val[:,:-1]], y_val.reshape(y_val.shape[0],y_val.shape[1], 1)[:,1:]))
 
-model.compile(optimizer='rmsprop',loss='categorical_crossentropy')
+from matplotlib import pyplot
+pyplot.plot(history.history['loss'], label='train')
+pyplot.plot(history.history['val_loss'], label='test')
+pyplot.legend()
+pyplot.show()
\ No newline at end of file