diff --git a/senju/image_reco.py b/senju/image_reco.py index 272e985..e0f4b30 100644 --- a/senju/image_reco.py +++ b/senju/image_reco.py @@ -2,10 +2,13 @@ Senju Image Recognition Module ============================= -A module providing image description generation capabilities for the Senju haiku application. +A module providing image description generation capabilities for the Senju +haiku application. -This module leverages pre-trained vision-language models (specifically BLIP) to generate -textual descriptions of uploaded images. These descriptions can then be used as input +This module leverages pre-trained vision-language models (specifically BLIP) +to generate +textual descriptions of uploaded images. These descriptions can then be +used as input for the haiku generation process, enabling image-to-haiku functionality. Classes @@ -43,11 +46,13 @@ from transformers import BlipProcessor, BlipForConditionalGeneration class ImageDescriptionGenerator: """ - A class for generating textual descriptions of images using a vision-language model. + A class for generating textual descriptions of images using + a vision-language model. - This class handles the loading of a pre-trained BLIP model, image preprocessing, - and caption generation. It provides an interface for converting raw image data - into natural language descriptions that can be used for haiku inspiration. + This class handles the loading of a pre-trained BLIP model, image + preprocessing, and caption generation. It provides an interface for + converting raw image data into natural language descriptions that can + be used for haiku inspiration. :ivar processor: The BLIP processor for handling image inputs :type processor: BlipProcessor @@ -75,14 +80,16 @@ class ImageDescriptionGenerator: """ Generate a descriptive caption for the given image. - This method processes the raw image data, runs inference with the BLIP model, - and returns a structured response with the generated description. + This method processes the raw image data, runs inference with + the BLIP model, and returns a structured response with the + generated description. :param image_data: Raw binary image data :type image_data: bytes :param max_length: Maximum token length for the generated caption :type max_length: int - :return: Dictionary containing the generated description and confidence score + :return: Dictionary containing the generated description and + confidence score :rtype: dict """ # Convert uploaded bytes to image @@ -120,12 +127,13 @@ def gen_response(image_data) -> dict: """ Generate a description for an image using the global description generator. - This function provides a simplified interface to the image description functionality - for use in API endpoints. + This function provides a simplified interface to the image + description functionality for use in API endpoints. :param image_data: Raw binary image data :type image_data: bytes - :return: Dictionary containing the image description and confidence information + :return: Dictionary containing the image description and + confidence information :rtype: dict :raises Exception: If image processing or description generation fails """