tensorflow seq2seq.py接口实例

以简单英文问答问题为例测试tensorflow1.4 tf.contrib.legacy_seq2seq中seq2seq文件的几个seq2seq接口

github:https://github.com/buyizhiyou/tf_seq2seq

测试 basic_rnn_seq2seq 的使用

  1 #-*-coding:utf8-*-
  2 
  3 __author="buyizhiyou"
  4 __date = "2018-7-30"
  5 
  6 
  7 import os
  8 import pdb
  9 import re
 10 from collections import Counter
 11 import matplotlib.pyplot as plt
 12 
 13 import tensorflow as tf 
 14 from seq2seq import basic_rnn_seq2seq
 15 
 16 from utils import * 
 17 
 18 os.environ["CUDA_VISIBLE_DEVICES"] = "1"#choose GPU 1
 19 
 20 input_batches = [
 21     ['Hi What is your name?', 'Nice to meet you!'],
 22     ['Which programming language do you use?', 'See you later.'],
 23     ['Where do you live?', 'What is your major?'],
 24     ['What do you want to drink?', 'What is your favorite beer?']]
 25 
 26 target_batches = [
 27     ['Hi this is Jaemin.', 'Nice to meet you too!'],
 28     ['I like Python.', 'Bye Bye.'],
 29     ['I live in Seoul, South Korea.', 'I study industrial engineering.'],
 30     ['Beer please!', 'Leffe brown!']]
 31 
 32 
 33 all_input_sentences = []
 34 for input_batch in input_batches:
 35     all_input_sentences.extend(input_batch)
 36 all_target_sentences = []
 37 for target_batch in target_batches:
 38     all_target_sentences.extend(target_batch)
 39 
 40 enc_vocab, enc_reverse_vocab, enc_vocab_size = build_vocab(all_input_sentences)#enc_vocab:word2idx,enc_reverse_vacab:idx2word,enc_vocab_size:26
 41 dec_vocab, dec_reverse_vocab, dec_vocab_size = build_vocab(all_target_sentences, is_target=True)##dec_vocab:word2idx,dec_reverse_vacab:idx2word,dec_vocab_size:28
 42 
 43 #hyperParameters
 44 n_epoch = 2000
 45 hidden_size = 50
 46 enc_emb_size = 20
 47 dec_emb_size = 21
 48 enc_sentence_length=10
 49 dec_sentence_length=11
 50 
 51 
 52 enc_inputs = tf.placeholder(tf.int32,shape=[None,enc_sentence_length],name='input_sentences')
 53 sequence_lengths = tf.placeholder(tf.int32,shape=[None],name='sentences_length')
 54 dec_inputs = tf.placeholder(tf.int32,shape=[None,dec_sentence_length+1],name='output_sentences')
 55 
 56 enc_inputs_t = tf.transpose(enc_inputs,perm=[1,0])
 57 dec_inputs_t = tf.transpose(dec_inputs,perm=[1,0])
 58 
 59 '''
 60 embedding 
 61 '''
 62 enc_Wemb = tf.get_variable('enc_word_emb',initializer=tf.random_uniform([enc_vocab_size+1,enc_emb_size]))
 63 dec_Wemb = tf.get_variable('dec_word_emb',initializer=tf.random_uniform([dec_vocab_size+2,dec_emb_size]))
 64 enc_emb_inputs = tf.nn.embedding_lookup(enc_Wemb,enc_inputs_t)
 65 dec_emb_inputs = tf.nn.embedding_lookup(dec_Wemb,dec_inputs_t)
 66 # enc_emb_inputs:list(enc_sent_len) of tensor[batch_size x embedding_size]
 67 # Because `static_rnn` takes list inputs
 68 enc_emb_inputs = tf.unstack(enc_emb_inputs)
 69 dec_emb_inputs = tf.unstack(dec_emb_inputs)
 70 
 71 cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
 72 dec_outputs,state = basic_rnn_seq2seq(enc_emb_inputs,dec_emb_inputs,cell)
 73 dec_outputs = tf.stack(dec_outputs)
 74 logits = tf.layers.dense(dec_outputs,units=dec_vocab_size+2,activation=tf.nn.relu)#fc层
 75 predictions = tf.argmax(logits,axis=2)
 76 predictions = tf.transpose(predictions,[1,0])
 77 # labels & logits: [dec_sentence_length+1 x batch_size x dec_vocab_size+2]
 78 labels = tf.one_hot(dec_inputs_t, dec_vocab_size+2)
 79         
 80 loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
 81     labels=labels, logits=logits))
 82 
 83 # training_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss)
 84 training_op = tf.train.RMSPropOptimizer(learning_rate=0.0001).minimize(loss)
 85 
 86 
 87 with tf.Session() as sess:
 88     sess.run(tf.global_variables_initializer())
 89     loss_history = []
 90     for epoch in range(n_epoch):
 91         all_preds = []
 92         epoch_loss = 0
 93         for input_batch,target_batch in zip(input_batches,target_batches):
 94             input_token_indices = []
 95             target_token_indices = []
 96             sentence_lengths = []
 97 
 98             for input_sent in input_batch:
 99                 input_sent,sent_len = sent2idx(input_sent,vocab=enc_vocab,max_sentence_length=enc_sentence_length)
100                 input_token_indices.append(input_sent)
101                 sentence_lengths.append(sent_len)
102             
103             for target_sent in target_batch:
104                 target_token_indices.append(sent2idx(target_sent,vocab=dec_vocab,max_sentence_length=dec_sentence_length,is_target=True))
105     
106             batch_preds,batch_loss, _ = sess.run(
107                 [predictions,loss, training_op],
108                 feed_dict={
109                     enc_inputs: input_token_indices,
110                     sequence_lengths: sentence_lengths,
111                     dec_inputs: target_token_indices
112                 })
113             loss_history.append(batch_loss)
114             epoch_loss += batch_loss
115             all_preds.append(batch_preds)
116             
117         # Logging every 400 epochs
118         if epoch % 400 == 0:
119             print('Epoch', epoch)
120             for input_batch, target_batch, batch_preds in zip(input_batches, target_batches, all_preds):
121                 for input_sent, target_sent, pred in zip(input_batch, target_batch, batch_preds):
122                     print('\t', input_sent)
123                     print('\t => ', idx2sent(pred, reverse_vocab=dec_reverse_vocab))
124                     print('\tCorrent answer:', target_sent)
125             print('\tepoch loss: {:.2f}\n'.format(epoch_loss))
126 
127 show_loss(loss_history)

测试 tied_rnn_seq2seq 的使用(该接口encoder和decoder共享参数)

  1 #-*-coding:utf8-*-
  2 
  3 __author="buyizhiyou"
  4 __date = "2018-7-30"
  5 
  6 
  7 import os
  8 import pdb
  9 import re
 10 from collections import Counter
 11 import matplotlib.pyplot as plt
 12 
 13 import tensorflow as tf 
 14 from seq2seq import tied_rnn_seq2seq
 15 
 16 from utils import * 
 17 
 18 os.environ["CUDA_VISIBLE_DEVICES"] = "1"#choose GPU 1
 19 
 20 input_batches = [
 21     ['Hi What is your name?', 'Nice to meet you!'],
 22     ['Which programming language do you use?', 'See you later.'],
 23     ['Where do you live?', 'What is your major?'],
 24     ['What do you want to drink?', 'What is your favorite beer?']]
 25 
 26 target_batches = [
 27     ['Hi this is Jaemin.', 'Nice to meet you too!'],
 28     ['I like Python.', 'Bye Bye.'],
 29     ['I live in Seoul, South Korea.', 'I study industrial engineering.'],
 30     ['Beer please!', 'Leffe brown!']]
 31 
 32 
 33 all_input_sentences = []
 34 for input_batch in input_batches:
 35     all_input_sentences.extend(input_batch)
 36 all_target_sentences = []
 37 for target_batch in target_batches:
 38     all_target_sentences.extend(target_batch)
 39 
 40 enc_vocab, enc_reverse_vocab, enc_vocab_size = build_vocab(all_input_sentences)#enc_vocab:word2idx,enc_reverse_vacab:idx2word,enc_vocab_size:26
 41 dec_vocab, dec_reverse_vocab, dec_vocab_size = build_vocab(all_target_sentences, is_target=True)##dec_vocab:word2idx,dec_reverse_vacab:idx2word,dec_vocab_size:28
 42 
 43 #hyperParameters
 44 n_epoch = 2000
 45 hidden_size = 50
 46 enc_emb_size = 20
 47 dec_emb_size = 20#must consistent with enc_emb_size for parameter sharing
 48 enc_sentence_length=10
 49 dec_sentence_length=11
 50 
 51 
 52 enc_inputs = tf.placeholder(tf.int32,shape=[None,enc_sentence_length],name='input_sentences')
 53 sequence_lengths = tf.placeholder(tf.int32,shape=[None],name='sentences_length')
 54 dec_inputs = tf.placeholder(tf.int32,shape=[None,dec_sentence_length+1],name='output_sentences')
 55 
 56 enc_inputs_t = tf.transpose(enc_inputs,perm=[1,0])
 57 dec_inputs_t = tf.transpose(dec_inputs,perm=[1,0])
 58 
 59 '''
 60 embedding 
 61 '''
 62 enc_Wemb = tf.get_variable('enc_word_emb',initializer=tf.random_uniform([enc_vocab_size+1,enc_emb_size]))
 63 dec_Wemb = tf.get_variable('dec_word_emb',initializer=tf.random_uniform([dec_vocab_size+2,dec_emb_size]))
 64 enc_emb_inputs = tf.nn.embedding_lookup(enc_Wemb,enc_inputs_t)
 65 dec_emb_inputs = tf.nn.embedding_lookup(dec_Wemb,dec_inputs_t)
 66 # enc_emb_inputs:list(enc_sent_len) of tensor[batch_size x embedding_size]
 67 # Because `static_rnn` takes list inputs
 68 enc_emb_inputs = tf.unstack(enc_emb_inputs)
 69 dec_emb_inputs = tf.unstack(dec_emb_inputs)
 70 
 71 cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
 72 dec_outputs,state = tied_rnn_seq2seq(enc_emb_inputs,dec_emb_inputs,cell)
 73 dec_outputs = tf.stack(dec_outputs)
 74 logits = tf.layers.dense(dec_outputs,units=dec_vocab_size+2,activation=tf.nn.relu)#fc层
 75 predictions = tf.argmax(logits,axis=2)
 76 predictions = tf.transpose(predictions,[1,0])
 77 # labels & logits: [dec_sentence_length+1 x batch_size x dec_vocab_size+2]
 78 labels = tf.one_hot(dec_inputs_t, dec_vocab_size+2)
 79         
 80 loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
 81     labels=labels, logits=logits))
 82 
 83 # training_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss)
 84 training_op = tf.train.RMSPropOptimizer(learning_rate=0.0001).minimize(loss)
 85 
 86 
 87 with tf.Session() as sess:
 88     sess.run(tf.global_variables_initializer())
 89     loss_history = []
 90     for epoch in range(n_epoch):
 91         all_preds = []
 92         epoch_loss = 0
 93         for input_batch,target_batch in zip(input_batches,target_batches):
 94             input_token_indices = []
 95             target_token_indices = []
 96             sentence_lengths = []
 97 
 98             for input_sent in input_batch:
 99                 input_sent,sent_len = sent2idx(input_sent,vocab=enc_vocab,max_sentence_length=enc_sentence_length)
100                 input_token_indices.append(input_sent)
101                 sentence_lengths.append(sent_len)
102             
103             for target_sent in target_batch:
104                 target_token_indices.append(sent2idx(target_sent,vocab=dec_vocab,max_sentence_length=dec_sentence_length,is_target=True))
105     
106             batch_preds,batch_loss, _ = sess.run(
107                 [predictions,loss, training_op],
108                 feed_dict={
109                     enc_inputs: input_token_indices,
110                     sequence_lengths: sentence_lengths,
111                     dec_inputs: target_token_indices
112                 })
113             loss_history.append(batch_loss)
114             epoch_loss += batch_loss
115             all_preds.append(batch_preds)
116             
117         # Logging every 400 epochs
118         if epoch % 400 == 0:
119             print('Epoch', epoch)
120             for input_batch, target_batch, batch_preds in zip(input_batches, target_batches, all_preds):
121                 for input_sent, target_sent, pred in zip(input_batch, target_batch, batch_preds):
122                     print('\t', input_sent)
123                     print('\t => ', idx2sent(pred, reverse_vocab=dec_reverse_vocab))
124                     print('\tCorrent answer:', target_sent)
125             print('\tepoch loss: {:.2f}\n'.format(epoch_loss))
126 
127 show_loss(loss_history)

测试 embedding_attention_seq2seq的使用

  1 #-*-coding:utf8-*-
  2 
  3 __author="buyizhiyou"
  4 __date = "2018-7-30"
  5 
  6 import os
  7 import pdb
  8 import re
  9 from collections import Counter
 10 import matplotlib.pyplot as plt
 11 
 12 import tensorflow as tf 
 13 from seq2seq import embedding_attention_seq2seq
 14 
 15 from utils import * 
 16 
 17 os.environ["CUDA_VISIBLE_DEVICES"] = "1"#choose GPU 1
 18 
 19 input_batches = [
 20     ['Hi What is your name?', 'Nice to meet you!'],
 21     ['Which programming language do you use?', 'See you later.'],
 22     ['Where do you live?', 'What is your major?'],
 23     ['What do you want to drink?', 'What is your favorite beer?']]
 24 
 25 target_batches = [
 26     ['Hi this is Jaemin.', 'Nice to meet you too!'],
 27     ['I like Python.', 'Bye Bye.'],
 28     ['I live in Seoul, South Korea.', 'I study industrial engineering.'],
 29     ['Beer please!', 'Leffe brown!']]
 30 
 31 
 32 all_input_sentences = []
 33 for input_batch in input_batches:
 34     all_input_sentences.extend(input_batch)
 35 all_target_sentences = []
 36 for target_batch in target_batches:
 37     all_target_sentences.extend(target_batch)
 38 
 39 enc_vocab, enc_reverse_vocab, enc_vocab_size = build_vocab(all_input_sentences)#enc_vocab:word2idx,enc_reverse_vacab:idx2word,enc_vocab_size:26
 40 dec_vocab, dec_reverse_vocab, dec_vocab_size = build_vocab(all_target_sentences, is_target=True)##dec_vocab:word2idx,dec_reverse_vacab:idx2word,dec_vocab_size:28
 41 
 42 #hyperParameters
 43 n_epoch = 2000
 44 hidden_size = 50
 45 enc_emb_size = 20
 46 dec_emb_size = 21
 47 enc_sentence_length=10
 48 dec_sentence_length=11
 49 
 50 
 51 enc_inputs = tf.placeholder(tf.int32,shape=[None,enc_sentence_length],name='input_sentences')
 52 sequence_lengths = tf.placeholder(tf.int32,shape=[None],name='sentences_length')
 53 dec_inputs = tf.placeholder(tf.int32,shape=[None,dec_sentence_length+1],name='output_sentences')
 54 
 55 enc_inputs_t = tf.transpose(enc_inputs,perm=[1,0])
 56 dec_inputs_t = tf.transpose(dec_inputs,perm=[1,0])
 57 labels = tf.one_hot(dec_inputs_t, dec_vocab_size+2)
 58 # labels & logits: [dec_sentence_length+1 x batch_size x dec_vocab_size+2]
 59 
 60 # enc_emb_inputs:list(enc_sent_len) of tensor[batch_size x embedding_size]
 61 # Because `static_rnn` takes list inputs
 62 enc_inputs_t = tf.unstack(enc_inputs_t)
 63 dec_inputs_t = tf.unstack(dec_inputs_t)
 64 
 65 cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
 66 dec_outputs,state = embedding_attention_seq2seq(
 67                             encoder_inputs=enc_inputs_t,
 68                             decoder_inputs=dec_inputs_t,
 69                             cell=cell,
 70                             num_encoder_symbols=enc_vocab_size+1,
 71                             num_decoder_symbols=dec_vocab_size+2,
 72                             embedding_size=enc_emb_size,
 73                             output_projection=None,
 74                             feed_previous=True
 75 )
 76 logits = tf.stack(dec_outputs)
 77 predictions = tf.argmax(logits,axis=2)
 78 predictions = tf.transpose(predictions,[1,0])
 79 
 80 loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
 81     labels=labels, logits=logits))
 82 # training_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss)
 83 training_op = tf.train.RMSPropOptimizer(learning_rate=0.0001).minimize(loss)
 84 
 85 
 86 with tf.Session() as sess:
 87     sess.run(tf.global_variables_initializer())
 88     loss_history = []
 89     for epoch in range(n_epoch):
 90         all_preds = []
 91         epoch_loss = 0
 92         for input_batch,target_batch in zip(input_batches,target_batches):
 93             input_token_indices = []
 94             target_token_indices = []
 95             sentence_lengths = []
 96 
 97             for input_sent in input_batch:
 98                 input_sent,sent_len = sent2idx(input_sent,vocab=enc_vocab,max_sentence_length=enc_sentence_length)
 99                 input_token_indices.append(input_sent)
100                 sentence_lengths.append(sent_len)
101             
102             for target_sent in target_batch:
103                 target_token_indices.append(sent2idx(target_sent,vocab=dec_vocab,max_sentence_length=dec_sentence_length,is_target=True))
104     
105             batch_preds,batch_loss, _ = sess.run(
106                 [predictions,loss, training_op],
107                 feed_dict={
108                     enc_inputs: input_token_indices,
109                     sequence_lengths: sentence_lengths,
110                     dec_inputs: target_token_indices
111                 })
112             loss_history.append(batch_loss)
113             epoch_loss += batch_loss
114             all_preds.append(batch_preds)
115             
116         # Logging every 400 epochs
117         if epoch % 400 == 0:
118             print('Epoch', epoch)
119             for input_batch, target_batch, batch_preds in zip(input_batches, target_batches, all_preds):
120                 for input_sent, target_sent, pred in zip(input_batch, target_batch, batch_preds):
121                     print('\t', input_sent)
122                     print('\t => ', idx2sent(pred, reverse_vocab=dec_reverse_vocab))
123                     print('\tCorrent answer:', target_sent)
124             print('\tepoch loss: {:.2f}\n'.format(epoch_loss))
125 
126 show_loss(loss_history)

测试embedding_seq2seq 的使用

  1 #-*-coding:utf8-*-
  2 
  3 __author="buyizhiyou"
  4 __date = "2018-7-30"
  5 
  6 
  7 '''
  8 测试embedding_rnn_seq2seq函数
  9 '''
 10 
 11 import os
 12 import pdb
 13 import re
 14 from collections import Counter
 15 import matplotlib.pyplot as plt
 16 
 17 import tensorflow as tf 
 18 from seq2seq import embedding_rnn_seq2seq
 19 
 20 from utils import * 
 21 
 22 os.environ["CUDA_VISIBLE_DEVICES"] = "1"#choose GPU 1
 23 
 24 input_batches = [
 25     ['Hi What is your name?', 'Nice to meet you!'],
 26     ['Which programming language do you use?', 'See you later.'],
 27     ['Where do you live?', 'What is your major?'],
 28     ['What do you want to drink?', 'What is your favorite beer?']]
 29 
 30 target_batches = [
 31     ['Hi this is Jaemin.', 'Nice to meet you too!'],
 32     ['I like Python.', 'Bye Bye.'],
 33     ['I live in Seoul, South Korea.', 'I study industrial engineering.'],
 34     ['Beer please!', 'Leffe brown!']]
 35 
 36 
 37 all_input_sentences = []
 38 for input_batch in input_batches:
 39     all_input_sentences.extend(input_batch)
 40 all_target_sentences = []
 41 for target_batch in target_batches:
 42     all_target_sentences.extend(target_batch)
 43 
 44 enc_vocab, enc_reverse_vocab, enc_vocab_size = build_vocab(all_input_sentences)#enc_vocab:word2idx,enc_reverse_vacab:idx2word,enc_vocab_size:26
 45 dec_vocab, dec_reverse_vocab, dec_vocab_size = build_vocab(all_target_sentences, is_target=True)##dec_vocab:word2idx,dec_reverse_vacab:idx2word,dec_vocab_size:28
 46 
 47 #hyperParameters
 48 n_epoch = 2000
 49 hidden_size = 50
 50 enc_emb_size = 20
 51 dec_emb_size = 21
 52 enc_sentence_length=10
 53 dec_sentence_length=11
 54 
 55 
 56 enc_inputs = tf.placeholder(tf.int32,shape=[None,enc_sentence_length],name='input_sentences')
 57 sequence_lengths = tf.placeholder(tf.int32,shape=[None],name='sentences_length')
 58 dec_inputs = tf.placeholder(tf.int32,shape=[None,dec_sentence_length+1],name='output_sentences')
 59 
 60 enc_inputs_t = tf.transpose(enc_inputs,perm=[1,0])
 61 dec_inputs_t = tf.transpose(dec_inputs,perm=[1,0])
 62 labels = tf.one_hot(dec_inputs_t, dec_vocab_size+2)
 63 # labels & logits: [dec_sentence_length+1 x batch_size x dec_vocab_size+2]
 64 
 65 # enc_emb_inputs:list(enc_sent_len) of tensor[batch_size x embedding_size]
 66 # Because `static_rnn` takes list inputs
 67 enc_inputs_t = tf.unstack(enc_inputs_t)
 68 dec_inputs_t = tf.unstack(dec_inputs_t)
 69 
 70 cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
 71 dec_outputs,state = embedding_rnn_seq2seq(
 72                             encoder_inputs=enc_inputs_t,
 73                             decoder_inputs=dec_inputs_t,
 74                             cell=cell,
 75                             num_encoder_symbols=enc_vocab_size+1,
 76                             num_decoder_symbols=dec_vocab_size+2,
 77                             embedding_size=enc_emb_size,
 78                             output_projection=None,
 79                             feed_previous=True
 80 )
 81 logits = tf.stack(dec_outputs)
 82 predictions = tf.argmax(logits,axis=2)
 83 predictions = tf.transpose(predictions,[1,0])
 84 
 85 loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
 86     labels=labels, logits=logits))
 87 # training_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss)
 88 training_op = tf.train.RMSPropOptimizer(learning_rate=0.0001).minimize(loss)
 89 
 90 
 91 with tf.Session() as sess:
 92     sess.run(tf.global_variables_initializer())
 93     loss_history = []
 94     for epoch in range(n_epoch):
 95         all_preds = []
 96         epoch_loss = 0
 97         for input_batch,target_batch in zip(input_batches,target_batches):
 98             input_token_indices = []
 99             target_token_indices = []
100             sentence_lengths = []
101 
102             for input_sent in input_batch:
103                 input_sent,sent_len = sent2idx(input_sent,vocab=enc_vocab,max_sentence_length=enc_sentence_length)
104                 input_token_indices.append(input_sent)
105                 sentence_lengths.append(sent_len)
106             
107             for target_sent in target_batch:
108                 target_token_indices.append(sent2idx(target_sent,vocab=dec_vocab,max_sentence_length=dec_sentence_length,is_target=True))
109     
110             batch_preds,batch_loss, _ = sess.run(
111                 [predictions,loss, training_op],
112                 feed_dict={
113                     enc_inputs: input_token_indices,
114                     sequence_lengths: sentence_lengths,
115                     dec_inputs: target_token_indices
116                 })
117             loss_history.append(batch_loss)
118             epoch_loss += batch_loss
119             all_preds.append(batch_preds)
120             
121         # Logging every 400 epochs
122         if epoch % 400 == 0:
123             print('Epoch', epoch)
124             for input_batch, target_batch, batch_preds in zip(input_batches, target_batches, all_preds):
125                 for input_sent, target_sent, pred in zip(input_batch, target_batch, batch_preds):
126                     print('\t', input_sent)
127                     print('\t => ', idx2sent(pred, reverse_vocab=dec_reverse_vocab))
128                     print('\tCorrent answer:', target_sent)
129             print('\tepoch loss: {:.2f}\n'.format(epoch_loss))
130 
131 show_loss(loss_history)