import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.inception_v3 import preprocess_input
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense
# Load pre-trained InceptionV3 model for feature extraction
def load_cnn_model():
base_model = InceptionV3(weights='imagenet')
cnn_model = Model(base_model.input, base_model.layers[-2].output)
return cnn_model
# Extract image features using CNN
def extract_image_features(cnn_model, img_path):
img = image.load_img(img_path, target_size=(299, 299))
img_array = image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)
img_array = preprocess_input(img_array)
features = cnn_model.predict(img_array)
return features
# Load tokenizer and pre-trained captioning model
def load_captioning_model(tokenizer_path, captioning_model_path):
tokenizer = tf.keras.models.load_model(tokenizer_path)
captioning_model = load_model(captioning_model_path)
return tokenizer, captioning_model
# Generate caption from image features
def generate_caption(features, tokenizer, captioning_model, max_length):
caption = 'startseq'
for _ in range(max_length):
sequence = tokenizer.texts_to_sequences([caption])[0]
sequence = pad_sequences([sequence], maxlen=max_length)
pred_word_index = np.argmax(captioning_model.predict([features, sequence]), axis=-1)
word = tokenizer.index_word.get(pred_word_index[0], '')
if word == 'endseq' or not word:
break
caption += ' ' + word
return caption.replace('startseq', '').strip()
# Main function
def main():
cnn_model = load_cnn_model()
img_path = 'sample_image.jpg' # Provide the path to your sample image
tokenizer_path = 'tokenizer.pkl' # Path to the saved tokenizer
captioning_model_path = 'captioning_model.h5' # Path to the saved captioning model
max_length = 20 # Max caption length
# Extract image features
features = extract_image_features(cnn_model, img_path)
# Load tokenizer and captioning model
tokenizer, captioning_model = load_captioning_model(tokenizer_path, captioning_model_path)
# Generate caption
caption = generate_caption(features, tokenizer, captioning_model, max_length)
print("Generated Caption:", caption)
if __name__ == '__main__':
main()