diff --git a/demos/aquarat/.env b/demos/aquarat/.env index 290a11ba..1a9bb11a 100644 --- a/demos/aquarat/.env +++ b/demos/aquarat/.env @@ -1,4 +1,10 @@ -USE_OPENAI_API_KEY="False" +# MODEL_TYPE choices: openai, ollama, custom, azure +MODEL_TYPE="ollama" + +# When using ollama only CUSTOM_MODEL_NAME it's required +CUSTOM_API_KEY="" +CUSTOM_MODEL_NAME ="" +CUSTOM_BASE_URL ="" OPENAI_API_KEY="" OPENAI_MODEL_NAME ="" diff --git a/demos/bbh/.env b/demos/bbh/.env index 290a11ba..1a9bb11a 100644 --- a/demos/bbh/.env +++ b/demos/bbh/.env @@ -1,4 +1,10 @@ -USE_OPENAI_API_KEY="False" +# MODEL_TYPE choices: openai, ollama, custom, azure +MODEL_TYPE="ollama" + +# When using ollama only CUSTOM_MODEL_NAME it's required +CUSTOM_API_KEY="" +CUSTOM_MODEL_NAME ="" +CUSTOM_BASE_URL ="" OPENAI_API_KEY="" OPENAI_MODEL_NAME ="" diff --git a/demos/gsm8k/.env b/demos/gsm8k/.env index 290a11ba..1a9bb11a 100644 --- a/demos/gsm8k/.env +++ b/demos/gsm8k/.env @@ -1,4 +1,10 @@ -USE_OPENAI_API_KEY="False" +# MODEL_TYPE choices: openai, ollama, custom, azure +MODEL_TYPE="ollama" + +# When using ollama only CUSTOM_MODEL_NAME it's required +CUSTOM_API_KEY="" +CUSTOM_MODEL_NAME ="" +CUSTOM_BASE_URL ="" OPENAI_API_KEY="" OPENAI_MODEL_NAME ="" diff --git a/demos/scenarios/.env b/demos/scenarios/.env index 290a11ba..1a9bb11a 100644 --- a/demos/scenarios/.env +++ b/demos/scenarios/.env @@ -1,4 +1,10 @@ -USE_OPENAI_API_KEY="False" +# MODEL_TYPE choices: openai, ollama, custom, azure +MODEL_TYPE="ollama" + +# When using ollama only CUSTOM_MODEL_NAME it's required +CUSTOM_API_KEY="" +CUSTOM_MODEL_NAME ="" +CUSTOM_BASE_URL ="" OPENAI_API_KEY="" OPENAI_MODEL_NAME ="" diff --git a/promptwizard/glue/common/llm/llm_mgr.py b/promptwizard/glue/common/llm/llm_mgr.py index c5cec9cd..357d72af 100644 --- a/promptwizard/glue/common/llm/llm_mgr.py +++ b/promptwizard/glue/common/llm/llm_mgr.py @@ -4,45 +4,67 @@ from llama_index.core.llms import LLM from tenacity import retry, stop_after_attempt, wait_fixed, wait_random from ..base_classes import LLMConfig -from ..constants.str_literals import InstallLibs, OAILiterals, \ - OAILiterals, LLMLiterals, LLMOutputTypes +from ..constants.str_literals import ( + InstallLibs, + OAILiterals, + OAILiterals, + LLMLiterals, + LLMOutputTypes, +) from .llm_helper import get_token_counter from ..exceptions import GlueLLMException from ..utils.runtime_tasks import install_lib_if_missing from ..utils.logging import get_glue_logger from ..utils.runtime_tasks import str_to_class import os -logger = get_glue_logger(__name__) +from openai import OpenAI +from azure.identity import get_bearer_token_provider, AzureCliCredential +from openai import AzureOpenAI -def call_api(messages): - from openai import OpenAI - from azure.identity import get_bearer_token_provider, AzureCliCredential - from openai import AzureOpenAI +logger = get_glue_logger(__name__) - if os.environ['USE_OPENAI_API_KEY'] == "True": - client = OpenAI(api_key=os.environ["OPENAI_API_KEY"]) - response = client.chat.completions.create( - model=os.environ["OPENAI_MODEL_NAME"], - messages=messages, - temperature=0.0, +def getClient(): + if os.environ["MODEL_TYPE"].lower() == "openai": + return OpenAI(api_key=os.environ["OPENAI_API_KEY"]) + elif os.environ["MODEL_TYPE"].lower() == "ollama": + return OpenAI(api_key="ollama", base_url="http://localhost:11434/v1") + elif os.environ["MODEL_TYPE"].lower() == "custom": + return OpenAI( + api_key=os.environ["CUSTOM_API_KEY"], base_url=os.environ["CUSTOM_BASE_URL"] ) else: token_provider = get_bearer_token_provider( - AzureCliCredential(), "https://cognitiveservices.azure.com/.default" - ) - client = AzureOpenAI( + AzureCliCredential(), "https://cognitiveservices.azure.com/.default" + ) + return AzureOpenAI( api_version=os.environ["OPENAI_API_VERSION"], azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"], - azure_ad_token_provider=token_provider - ) - response = client.chat.completions.create( - model=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"], - messages=messages, - temperature=0.0, + azure_ad_token_provider=token_provider, ) + +def getModel(): + if os.environ["MODEL_TYPE"].lower() == "openai": + return os.environ["OPENAI_MODEL_NAME"] + elif os.environ["MODEL_TYPE"].lower() == "ollama": + return os.environ["CUSTOM_MODEL_NAME"] + elif os.environ["MODEL_TYPE"].lower() == "custom": + return os.environ["CUSTOM_MODEL_NAME"] + else: + return os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"] + + +# Main logic +def call_api(messages): + client = getClient() + model_name = getModel() + response = client.chat.completions.create( + model=model_name, + messages=messages, + temperature=0.0, + ) prediction = response.choices[0].message.content return prediction @@ -50,20 +72,18 @@ def call_api(messages): class LLMMgr: @staticmethod def chat_completion(messages: Dict): - llm_handle = os.environ.get("MODEL_TYPE", "AzureOpenAI") + llm_handle = os.environ.get("MODEL_TYPE") try: - if(llm_handle == "AzureOpenAI"): - # Code to for calling LLMs - return call_api(messages) - elif(llm_handle == "LLamaAML"): - # Code to for calling SLMs + if llm_handle == "LLamaAML": return 0 + else: + prediction = call_api(messages) + return prediction except Exception as e: print(e) return "Sorry, I am not able to understand your query. Please try again." # raise GlueLLMException(f"Exception when calling {llm_handle.__class__.__name__} " # f"LLM in chat mode, with message {messages} ", e) - @staticmethod def get_all_model_ids_of_type(llm_config: LLMConfig, llm_output_type: str): @@ -97,6 +117,7 @@ def get_llm_pool(llm_config: LLMConfig) -> Dict[str, LLM]: install_lib_if_missing(InstallLibs.TIKTOKEN) import tiktoken + # from llama_index.llms.azure_openai import AzureOpenAI from openai import AzureOpenAI from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding @@ -105,75 +126,85 @@ def get_llm_pool(llm_config: LLMConfig) -> Dict[str, LLM]: az_token_provider = None # if az_llm_config.use_azure_ad: from azure.identity import get_bearer_token_provider, AzureCliCredential - az_token_provider = get_bearer_token_provider(AzureCliCredential(), - "https://cognitiveservices.azure.com/.default") + + az_token_provider = get_bearer_token_provider( + AzureCliCredential(), "https://cognitiveservices.azure.com/.default" + ) for azure_oai_model in az_llm_config.azure_oai_models: callback_mgr = None if azure_oai_model.track_tokens: - + # If we need to count number of tokens used in LLM calls token_counter = TokenCountingHandler( - tokenizer=tiktoken.encoding_for_model(azure_oai_model.model_name_in_azure).encode - ) + tokenizer=tiktoken.encoding_for_model( + azure_oai_model.model_name_in_azure + ).encode + ) callback_mgr = CallbackManager([token_counter]) token_counter.reset_counts() # () - if azure_oai_model.model_type in [LLMOutputTypes.CHAT, LLMOutputTypes.COMPLETION]: + if azure_oai_model.model_type in [ + LLMOutputTypes.CHAT, + LLMOutputTypes.COMPLETION, + ]: # () - llm_pool[azure_oai_model.unique_model_id] = \ - AzureOpenAI( - # use_azure_ad=az_llm_config.use_azure_ad, - azure_ad_token_provider=az_token_provider, - # model=azure_oai_model.model_name_in_azure, - # deployment_name=azure_oai_model.deployment_name_in_azure, - api_key=az_llm_config.api_key, - azure_endpoint=az_llm_config.azure_endpoint, - api_version=az_llm_config.api_version, - # callback_manager=callback_mgr - ) + llm_pool[azure_oai_model.unique_model_id] = AzureOpenAI( + # use_azure_ad=az_llm_config.use_azure_ad, + azure_ad_token_provider=az_token_provider, + # model=azure_oai_model.model_name_in_azure, + # deployment_name=azure_oai_model.deployment_name_in_azure, + api_key=az_llm_config.api_key, + azure_endpoint=az_llm_config.azure_endpoint, + api_version=az_llm_config.api_version, + # callback_manager=callback_mgr + ) # () elif azure_oai_model.model_type == LLMOutputTypes.EMBEDDINGS: - llm_pool[azure_oai_model.unique_model_id] =\ - AzureOpenAIEmbedding(use_azure_ad=az_llm_config.use_azure_ad, - azure_ad_token_provider=az_token_provider, - model=azure_oai_model.model_name_in_azure, - deployment_name=azure_oai_model.deployment_name_in_azure, - api_key=az_llm_config.api_key, - azure_endpoint=az_llm_config.azure_endpoint, - api_version=az_llm_config.api_version, - callback_manager=callback_mgr - ) + llm_pool[azure_oai_model.unique_model_id] = AzureOpenAIEmbedding( + use_azure_ad=az_llm_config.use_azure_ad, + azure_ad_token_provider=az_token_provider, + model=azure_oai_model.model_name_in_azure, + deployment_name=azure_oai_model.deployment_name_in_azure, + api_key=az_llm_config.api_key, + azure_endpoint=az_llm_config.azure_endpoint, + api_version=az_llm_config.api_version, + callback_manager=callback_mgr, + ) elif azure_oai_model.model_type == LLMOutputTypes.MULTI_MODAL: - llm_pool[azure_oai_model.unique_model_id] = \ - AzureOpenAIMultiModal(use_azure_ad=az_llm_config.use_azure_ad, - azure_ad_token_provider=az_token_provider, - model=azure_oai_model.model_name_in_azure, - deployment_name=azure_oai_model.deployment_name_in_azure, - api_key=az_llm_config.api_key, - azure_endpoint=az_llm_config.azure_endpoint, - api_version=az_llm_config.api_version, - max_new_tokens=4096 - ) + llm_pool[azure_oai_model.unique_model_id] = AzureOpenAIMultiModal( + use_azure_ad=az_llm_config.use_azure_ad, + azure_ad_token_provider=az_token_provider, + model=azure_oai_model.model_name_in_azure, + deployment_name=azure_oai_model.deployment_name_in_azure, + api_key=az_llm_config.api_key, + azure_endpoint=az_llm_config.azure_endpoint, + api_version=az_llm_config.api_version, + max_new_tokens=4096, + ) if llm_config.custom_models: for custom_model in llm_config.custom_models: # try: - custom_llm_class = str_to_class(custom_model.class_name, None, custom_model.path_to_py_file) + custom_llm_class = str_to_class( + custom_model.class_name, None, custom_model.path_to_py_file + ) callback_mgr = None if custom_model.track_tokens: # If we need to count number of tokens used in LLM calls token_counter = TokenCountingHandler( tokenizer=custom_llm_class.get_tokenizer() - ) + ) callback_mgr = CallbackManager([token_counter]) token_counter.reset_counts() - llm_pool[custom_model.unique_model_id] = custom_llm_class(callback_manager=callback_mgr) + llm_pool[custom_model.unique_model_id] = custom_llm_class( + callback_manager=callback_mgr + ) # except Exception as e: - # raise GlueLLMException(f"Custom model {custom_model.unique_model_id} not loaded.", e) + # raise GlueLLMException(f"Custom model {custom_model.unique_model_id} not loaded.", e) return llm_pool @staticmethod @@ -190,6 +221,6 @@ def get_tokens_used(llm_handle: LLM) -> Dict[str, int]: LLMLiterals.EMBEDDING_TOKEN_COUNT: token_counter.total_embedding_token_count, LLMLiterals.PROMPT_LLM_TOKEN_COUNT: token_counter.prompt_llm_token_count, LLMLiterals.COMPLETION_LLM_TOKEN_COUNT: token_counter.completion_llm_token_count, - LLMLiterals.TOTAL_LLM_TOKEN_COUNT: token_counter.total_llm_token_count - } + LLMLiterals.TOTAL_LLM_TOKEN_COUNT: token_counter.total_llm_token_count, + } return None