Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion demos/aquarat/.env
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
USE_OPENAI_API_KEY="False"
# MODEL_TYPE choices: openai, ollama, custom, azure
MODEL_TYPE="ollama"

# When using ollama only CUSTOM_MODEL_NAME it's required
CUSTOM_API_KEY=""
CUSTOM_MODEL_NAME =""
CUSTOM_BASE_URL =""

OPENAI_API_KEY=""
OPENAI_MODEL_NAME =""
Expand Down
8 changes: 7 additions & 1 deletion demos/bbh/.env
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
USE_OPENAI_API_KEY="False"
# MODEL_TYPE choices: openai, ollama, custom, azure
MODEL_TYPE="ollama"

# When using ollama only CUSTOM_MODEL_NAME it's required
CUSTOM_API_KEY=""
CUSTOM_MODEL_NAME =""
CUSTOM_BASE_URL =""

OPENAI_API_KEY=""
OPENAI_MODEL_NAME =""
Expand Down
8 changes: 7 additions & 1 deletion demos/gsm8k/.env
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
USE_OPENAI_API_KEY="False"
# MODEL_TYPE choices: openai, ollama, custom, azure
MODEL_TYPE="ollama"

# When using ollama only CUSTOM_MODEL_NAME it's required
CUSTOM_API_KEY=""
CUSTOM_MODEL_NAME =""
CUSTOM_BASE_URL =""

OPENAI_API_KEY=""
OPENAI_MODEL_NAME =""
Expand Down
8 changes: 7 additions & 1 deletion demos/scenarios/.env
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
USE_OPENAI_API_KEY="False"
# MODEL_TYPE choices: openai, ollama, custom, azure
MODEL_TYPE="ollama"

# When using ollama only CUSTOM_MODEL_NAME it's required
CUSTOM_API_KEY=""
CUSTOM_MODEL_NAME =""
CUSTOM_BASE_URL =""

OPENAI_API_KEY=""
OPENAI_MODEL_NAME =""
Expand Down
175 changes: 103 additions & 72 deletions promptwizard/glue/common/llm/llm_mgr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,66 +4,86 @@
from llama_index.core.llms import LLM
from tenacity import retry, stop_after_attempt, wait_fixed, wait_random
from ..base_classes import LLMConfig
from ..constants.str_literals import InstallLibs, OAILiterals, \
OAILiterals, LLMLiterals, LLMOutputTypes
from ..constants.str_literals import (
InstallLibs,
OAILiterals,
OAILiterals,
LLMLiterals,
LLMOutputTypes,
)
from .llm_helper import get_token_counter
from ..exceptions import GlueLLMException
from ..utils.runtime_tasks import install_lib_if_missing
from ..utils.logging import get_glue_logger
from ..utils.runtime_tasks import str_to_class
import os
logger = get_glue_logger(__name__)
from openai import OpenAI
from azure.identity import get_bearer_token_provider, AzureCliCredential
from openai import AzureOpenAI

def call_api(messages):

from openai import OpenAI
from azure.identity import get_bearer_token_provider, AzureCliCredential
from openai import AzureOpenAI
logger = get_glue_logger(__name__)

if os.environ['USE_OPENAI_API_KEY'] == "True":
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

response = client.chat.completions.create(
model=os.environ["OPENAI_MODEL_NAME"],
messages=messages,
temperature=0.0,
def getClient():
if os.environ["MODEL_TYPE"].lower() == "openai":
return OpenAI(api_key=os.environ["OPENAI_API_KEY"])
elif os.environ["MODEL_TYPE"].lower() == "ollama":
return OpenAI(api_key="ollama", base_url="http://localhost:11434/v1")
elif os.environ["MODEL_TYPE"].lower() == "custom":
return OpenAI(
api_key=os.environ["CUSTOM_API_KEY"], base_url=os.environ["CUSTOM_BASE_URL"]
)
else:
token_provider = get_bearer_token_provider(
AzureCliCredential(), "https://cognitiveservices.azure.com/.default"
)
client = AzureOpenAI(
AzureCliCredential(), "https://cognitiveservices.azure.com/.default"
)
return AzureOpenAI(
api_version=os.environ["OPENAI_API_VERSION"],
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
azure_ad_token_provider=token_provider
)
response = client.chat.completions.create(
model=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
messages=messages,
temperature=0.0,
azure_ad_token_provider=token_provider,
)


def getModel():
if os.environ["MODEL_TYPE"].lower() == "openai":
return os.environ["OPENAI_MODEL_NAME"]
elif os.environ["MODEL_TYPE"].lower() == "ollama":
return os.environ["CUSTOM_MODEL_NAME"]
elif os.environ["MODEL_TYPE"].lower() == "custom":
return os.environ["CUSTOM_MODEL_NAME"]
else:
return os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"]


# Main logic
def call_api(messages):
client = getClient()
model_name = getModel()
response = client.chat.completions.create(
model=model_name,
messages=messages,
temperature=0.0,
)
prediction = response.choices[0].message.content
return prediction


class LLMMgr:
@staticmethod
def chat_completion(messages: Dict):
llm_handle = os.environ.get("MODEL_TYPE", "AzureOpenAI")
llm_handle = os.environ.get("MODEL_TYPE")
try:
if(llm_handle == "AzureOpenAI"):
# Code to for calling LLMs
return call_api(messages)
elif(llm_handle == "LLamaAML"):
# Code to for calling SLMs
if llm_handle == "LLamaAML":
return 0
else:
prediction = call_api(messages)
return prediction
except Exception as e:
print(e)
return "Sorry, I am not able to understand your query. Please try again."
# raise GlueLLMException(f"Exception when calling {llm_handle.__class__.__name__} "
# f"LLM in chat mode, with message {messages} ", e)


@staticmethod
def get_all_model_ids_of_type(llm_config: LLMConfig, llm_output_type: str):
Expand Down Expand Up @@ -97,6 +117,7 @@ def get_llm_pool(llm_config: LLMConfig) -> Dict[str, LLM]:
install_lib_if_missing(InstallLibs.TIKTOKEN)

import tiktoken

# from llama_index.llms.azure_openai import AzureOpenAI
from openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
Expand All @@ -105,75 +126,85 @@ def get_llm_pool(llm_config: LLMConfig) -> Dict[str, LLM]:
az_token_provider = None
# if az_llm_config.use_azure_ad:
from azure.identity import get_bearer_token_provider, AzureCliCredential
az_token_provider = get_bearer_token_provider(AzureCliCredential(),
"https://cognitiveservices.azure.com/.default")

az_token_provider = get_bearer_token_provider(
AzureCliCredential(), "https://cognitiveservices.azure.com/.default"
)

for azure_oai_model in az_llm_config.azure_oai_models:
callback_mgr = None
if azure_oai_model.track_tokens:

# If we need to count number of tokens used in LLM calls
token_counter = TokenCountingHandler(
tokenizer=tiktoken.encoding_for_model(azure_oai_model.model_name_in_azure).encode
)
tokenizer=tiktoken.encoding_for_model(
azure_oai_model.model_name_in_azure
).encode
)
callback_mgr = CallbackManager([token_counter])
token_counter.reset_counts()
# ()

if azure_oai_model.model_type in [LLMOutputTypes.CHAT, LLMOutputTypes.COMPLETION]:
if azure_oai_model.model_type in [
LLMOutputTypes.CHAT,
LLMOutputTypes.COMPLETION,
]:
# ()
llm_pool[azure_oai_model.unique_model_id] = \
AzureOpenAI(
# use_azure_ad=az_llm_config.use_azure_ad,
azure_ad_token_provider=az_token_provider,
# model=azure_oai_model.model_name_in_azure,
# deployment_name=azure_oai_model.deployment_name_in_azure,
api_key=az_llm_config.api_key,
azure_endpoint=az_llm_config.azure_endpoint,
api_version=az_llm_config.api_version,
# callback_manager=callback_mgr
)
llm_pool[azure_oai_model.unique_model_id] = AzureOpenAI(
# use_azure_ad=az_llm_config.use_azure_ad,
azure_ad_token_provider=az_token_provider,
# model=azure_oai_model.model_name_in_azure,
# deployment_name=azure_oai_model.deployment_name_in_azure,
api_key=az_llm_config.api_key,
azure_endpoint=az_llm_config.azure_endpoint,
api_version=az_llm_config.api_version,
# callback_manager=callback_mgr
)
# ()
elif azure_oai_model.model_type == LLMOutputTypes.EMBEDDINGS:
llm_pool[azure_oai_model.unique_model_id] =\
AzureOpenAIEmbedding(use_azure_ad=az_llm_config.use_azure_ad,
azure_ad_token_provider=az_token_provider,
model=azure_oai_model.model_name_in_azure,
deployment_name=azure_oai_model.deployment_name_in_azure,
api_key=az_llm_config.api_key,
azure_endpoint=az_llm_config.azure_endpoint,
api_version=az_llm_config.api_version,
callback_manager=callback_mgr
)
llm_pool[azure_oai_model.unique_model_id] = AzureOpenAIEmbedding(
use_azure_ad=az_llm_config.use_azure_ad,
azure_ad_token_provider=az_token_provider,
model=azure_oai_model.model_name_in_azure,
deployment_name=azure_oai_model.deployment_name_in_azure,
api_key=az_llm_config.api_key,
azure_endpoint=az_llm_config.azure_endpoint,
api_version=az_llm_config.api_version,
callback_manager=callback_mgr,
)
elif azure_oai_model.model_type == LLMOutputTypes.MULTI_MODAL:

llm_pool[azure_oai_model.unique_model_id] = \
AzureOpenAIMultiModal(use_azure_ad=az_llm_config.use_azure_ad,
azure_ad_token_provider=az_token_provider,
model=azure_oai_model.model_name_in_azure,
deployment_name=azure_oai_model.deployment_name_in_azure,
api_key=az_llm_config.api_key,
azure_endpoint=az_llm_config.azure_endpoint,
api_version=az_llm_config.api_version,
max_new_tokens=4096
)
llm_pool[azure_oai_model.unique_model_id] = AzureOpenAIMultiModal(
use_azure_ad=az_llm_config.use_azure_ad,
azure_ad_token_provider=az_token_provider,
model=azure_oai_model.model_name_in_azure,
deployment_name=azure_oai_model.deployment_name_in_azure,
api_key=az_llm_config.api_key,
azure_endpoint=az_llm_config.azure_endpoint,
api_version=az_llm_config.api_version,
max_new_tokens=4096,
)

if llm_config.custom_models:
for custom_model in llm_config.custom_models:
# try:
custom_llm_class = str_to_class(custom_model.class_name, None, custom_model.path_to_py_file)
custom_llm_class = str_to_class(
custom_model.class_name, None, custom_model.path_to_py_file
)

callback_mgr = None
if custom_model.track_tokens:
# If we need to count number of tokens used in LLM calls
token_counter = TokenCountingHandler(
tokenizer=custom_llm_class.get_tokenizer()
)
)
callback_mgr = CallbackManager([token_counter])
token_counter.reset_counts()
llm_pool[custom_model.unique_model_id] = custom_llm_class(callback_manager=callback_mgr)
llm_pool[custom_model.unique_model_id] = custom_llm_class(
callback_manager=callback_mgr
)
# except Exception as e:
# raise GlueLLMException(f"Custom model {custom_model.unique_model_id} not loaded.", e)
# raise GlueLLMException(f"Custom model {custom_model.unique_model_id} not loaded.", e)
return llm_pool

@staticmethod
Expand All @@ -190,6 +221,6 @@ def get_tokens_used(llm_handle: LLM) -> Dict[str, int]:
LLMLiterals.EMBEDDING_TOKEN_COUNT: token_counter.total_embedding_token_count,
LLMLiterals.PROMPT_LLM_TOKEN_COUNT: token_counter.prompt_llm_token_count,
LLMLiterals.COMPLETION_LLM_TOKEN_COUNT: token_counter.completion_llm_token_count,
LLMLiterals.TOTAL_LLM_TOKEN_COUNT: token_counter.total_llm_token_count
}
LLMLiterals.TOTAL_LLM_TOKEN_COUNT: token_counter.total_llm_token_count,
}
return None