diff --git a/.env.example b/.env.example
new file mode 100644
index 00000000..d52737ce
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,16 @@
+# API Keys for LLM Services
+GOOGLE_API_KEY=your_gemini_api_key_here
+OPENAI_API_KEY=your_openai_api_key_here
+
+# API Configuration
+USE_OPENAI_API_KEY=True # Set to True to use OpenAI, False to use Azure OpenAI
+OPENAI_MODEL_NAME=gpt-4 # Model name for OpenAI
+
+# Azure OpenAI Configuration (if USE_OPENAI_API_KEY is False)
+AZURE_OPENAI_ENDPOINT=https://your-resource-name.openai.azure.com/
+OPENAI_API_VERSION=2023-05-15
+AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=your_deployment_name
+
+# UI Configuration
+NEXT_PUBLIC_API_URL=http://localhost:5000 # URL of the API server
+NEXT_PUBLIC_DEFAULT_MODEL=Gemini # Default model to use
diff --git a/.gitignore b/.gitignore
index 8a30d258..0185b414 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,9 @@
+# Virtual Environment
+venv/
+
+# Node Modules
+ui/node_modules/
+
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
##
diff --git a/.vercelignore b/.vercelignore
new file mode 100644
index 00000000..54279388
--- /dev/null
+++ b/.vercelignore
@@ -0,0 +1,46 @@
+# Ignore files not needed for Vercel deployment
+
+# Python virtual environments
+venv/
+env/
+.env/
+
+# Development files
+.git/
+.github/
+.gitignore
+.vscode/
+.idea/
+
+# Large data files
+demos/
+data/
+
+# Documentation
+docs/
+*.md
+!README.md
+!VERCEL_DEPLOYMENT.md
+
+# Test files
+**/tests/
+**/__pycache__/
+**/*.pyc
+**/*.pyo
+**/*.pyd
+
+# Build artifacts
+**/dist/
+**/build/
+**/*.egg-info/
+
+# Node.js
+**/node_modules/
+**/.next/
+**/out/
+**/.cache/
+
+# Temporary files
+**/tmp/
+**/temp/
+**/.DS_Store
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 00000000..082b1943
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+ "makefile.configureOnOpen": false
+}
\ No newline at end of file
diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md
new file mode 100644
index 00000000..560cd41f
--- /dev/null
+++ b/DEPLOYMENT.md
@@ -0,0 +1,232 @@
+# PromptWizard Deployment Guide
+
+This guide provides instructions for deploying PromptWizard in various environments.
+
+## Table of Contents
+
+- [Local Development](#local-development)
+- [Docker Deployment](#docker-deployment)
+- [Cloud Deployment](#cloud-deployment)
+ - [Azure](#azure)
+ - [AWS](#aws)
+ - [Google Cloud](#google-cloud)
+- [Environment Variables](#environment-variables)
+- [Security Considerations](#security-considerations)
+- [Troubleshooting](#troubleshooting)
+
+## Local Development
+
+### Prerequisites
+
+- Python 3.8+
+- Node.js 18+
+- npm or yarn
+
+### API Setup
+
+1. Navigate to the API directory:
+ ```bash
+ cd api
+ ```
+
+2. Create a virtual environment:
+ ```bash
+ python -m venv venv
+ ```
+
+3. Activate the virtual environment:
+ - Windows: `venv\Scripts\activate`
+ - macOS/Linux: `source venv/bin/activate`
+
+4. Install dependencies:
+ ```bash
+ pip install -r requirements.txt
+ ```
+
+5. Start the API server:
+ ```bash
+ python app.py
+ ```
+
+The API will be available at http://localhost:5000.
+
+### UI Setup
+
+1. Navigate to the UI directory:
+ ```bash
+ cd ui
+ ```
+
+2. Install dependencies:
+ ```bash
+ npm install
+ ```
+
+3. Create a `.env.local` file with the following content:
+ ```
+ NEXT_PUBLIC_API_URL=http://localhost:5000
+ ```
+
+4. Start the development server:
+ ```bash
+ npm run dev
+ ```
+
+The UI will be available at http://localhost:3000.
+
+## Docker Deployment
+
+### Prerequisites
+
+- Docker
+- Docker Compose
+
+### Deployment Steps
+
+1. Clone the repository:
+ ```bash
+ git clone https://github.com/microsoft/PromptWizard.git
+ cd PromptWizard
+ ```
+
+2. Create a `.env` file in the root directory with your API keys:
+ ```
+ GOOGLE_API_KEY=your_gemini_api_key
+ OPENAI_API_KEY=your_openai_api_key
+ ```
+
+3. Build and start the containers:
+ ```bash
+ docker-compose up -d
+ ```
+
+4. Access the UI at http://localhost:3000
+
+### Individual Container Deployment
+
+If you prefer to deploy the API and UI separately:
+
+#### API Container
+
+```bash
+cd api
+docker build -t promptwizard-api .
+docker run -p 5000:5000 -e GOOGLE_API_KEY=your_key -e OPENAI_API_KEY=your_key promptwizard-api
+```
+
+#### UI Container
+
+```bash
+cd ui
+docker build -t promptwizard-ui .
+docker run -p 3000:3000 -e NEXT_PUBLIC_API_URL=http://localhost:5000 promptwizard-ui
+```
+
+## Cloud Deployment
+
+### Azure
+
+#### Azure App Service
+
+1. Create two App Services (one for API, one for UI)
+2. Deploy the API:
+ ```bash
+ cd api
+ az webapp up --sku B1 --name promptwizard-api
+ ```
+3. Deploy the UI:
+ ```bash
+ cd ui
+ az webapp up --sku B1 --name promptwizard-ui
+ ```
+4. Configure environment variables in the Azure Portal
+
+#### Azure Container Instances
+
+1. Build and push Docker images to Azure Container Registry
+2. Deploy containers using Azure CLI or Azure Portal
+3. Configure networking to allow communication between containers
+
+### AWS
+
+#### AWS Elastic Beanstalk
+
+1. Create two Elastic Beanstalk environments
+2. Deploy the API:
+ ```bash
+ cd api
+ eb init && eb create promptwizard-api
+ ```
+3. Deploy the UI:
+ ```bash
+ cd ui
+ eb init && eb create promptwizard-ui
+ ```
+4. Configure environment variables in the Elastic Beanstalk console
+
+#### AWS ECS
+
+1. Create an ECS cluster
+2. Define task definitions for API and UI
+3. Create services for each task
+4. Configure load balancers and networking
+
+### Google Cloud
+
+#### Google Cloud Run
+
+1. Build and push Docker images to Google Container Registry
+2. Deploy the API:
+ ```bash
+ cd api
+ gcloud run deploy promptwizard-api --image gcr.io/your-project/promptwizard-api
+ ```
+3. Deploy the UI:
+ ```bash
+ cd ui
+ gcloud run deploy promptwizard-ui --image gcr.io/your-project/promptwizard-ui
+ ```
+4. Configure environment variables in the Cloud Run console
+
+## Environment Variables
+
+### API Environment Variables
+
+- `GOOGLE_API_KEY`: API key for Google Gemini
+- `OPENAI_API_KEY`: API key for OpenAI
+- `FLASK_ENV`: Set to `production` for production deployment
+- `FLASK_APP`: Set to `app.py`
+
+### UI Environment Variables
+
+- `NEXT_PUBLIC_API_URL`: URL of the PromptWizard API
+- `NEXT_PUBLIC_DEFAULT_MODEL`: Default model to use (Gemini, GPT-4, etc.)
+
+## Security Considerations
+
+1. **API Keys**: Never commit API keys to version control. Use environment variables or secrets management.
+2. **CORS**: The API has CORS enabled for the frontend. In production, restrict CORS to your frontend domain.
+3. **Rate Limiting**: Consider implementing rate limiting to prevent abuse.
+4. **Input Validation**: All user input is validated, but be cautious when deploying to production.
+5. **HTTPS**: Always use HTTPS in production environments.
+
+## Troubleshooting
+
+### Common Issues
+
+1. **API Connection Error**:
+ - Check if the API server is running
+ - Verify the `NEXT_PUBLIC_API_URL` environment variable
+ - Check network connectivity between UI and API
+
+2. **Model API Errors**:
+ - Verify API keys are correct
+ - Check if you have sufficient quota/credits
+ - Ensure the model is available in your region
+
+3. **Docker Issues**:
+ - Run `docker-compose logs` to view container logs
+ - Check if ports are correctly mapped
+ - Verify environment variables are set correctly
+
+For more help, please open an issue on the GitHub repository.
diff --git a/README.md b/README.md
index 5d622569..22e6da80 100644
--- a/README.md
+++ b/README.md
@@ -1,210 +1,334 @@
+# ✨ PromptWizard UI ✨
-# PromptWizard 🧙
-
-
+
-> **PromptWizard: Task-Aware Prompt Optimization Framework**
-> Eshaan Agarwal, Joykirat Singh, Vivek Dani, Raghav Magazine, Tanuja Ganu, Akshay Nambi
+> ### 🚀 **Welcome to PromptWizard UI!**
+>
+> This project is a modern web interface built on top of Microsoft's PromptWizard framework. Created in a single coding session, it provides an intuitive way to optimize prompts for various LLM tasks.
+>
+> ⚠️ **Note:** As this was developed rapidly, some features may not be fully implemented. Currently, the Gemini model integration has been tested and works reliably. Other models have code written but haven't been thoroughly tested.
+>
+> 🧪 **Quick Testing:** Use the "Test Values" button to auto-fill the form with sample data for image generation prompts and quickly test the functionality.
+>
+> 🤝 **Contributions Welcome:** Whether it's bug fixes, feature enhancements, or documentation improvements, all contributions are appreciated! This is an open project that welcomes community involvement.
-## Overview 🌟
-
Overview of the PromptWizard framework
-
+## Overview
-PromptWizard is a discrete prompt optimization framework that employs a self-evolving mechanism where the LLM generates, critiques, and refines its own prompts and examples, continuously improving through iterative feedback and synthesis. This self-adaptive approach ensures holistic optimization by evolving both the instructions and in-context learning examples for better task performance.
+PromptWizard UI provides a sleek web interface for optimizing prompts using the PromptWizard framework. It allows users to:
-Three key components of PromptWizard are te following :
+- Enter task descriptions and base instructions
+- Configure optimization parameters through an intuitive tabbed interface
+- Select models and datasets with visual feedback
+- Optimize prompts with a single click
+- Export optimized prompts and configurations
-- Feedback-driven Refinement: LLM generates, critiques, and refines its own prompts and examples, continuously improving through iterative feedback and synthesis
-- Critique and Synthesize diverse examples: Generates synthetic examples that are robust, diverse and task-aware. Also it optimizes both prompt and examples in tandem
-- Self generated Chain of Thought (CoT) steps with combination of positive, negative and synthetic examples
+## Quick Demo Video
-
Stage 1: Iterative optimization of instructions
-
+
+
+
+
+ Click to watch a quick demo of PromptWizard UI in action
-
Stage 2: Sequential optimization of instruction and examples
-
-
-
+## Test Values Feature
+
+The "Test Values" button allows you to quickly populate the form with pre-configured values for image generation prompts with Ghibli-style aesthetics.
+
+
+ Test Values Sequence Diagram
+
+```mermaid
+sequenceDiagram
+ participant User
+ participant UI as PromptWizard UI
+ participant API as Backend API
+ participant LLM as Language Model (Gemini/GPT)
+
+ Note over User,LLM: Initial Setup Phase
+ User->>UI: Enter task description
+ User->>UI: Enter/select base instruction
+ User->>UI: Set mutation rounds (N)
+ User->>UI: Set refinement steps (M)
+ User->>UI: Click "Optimize Prompt"
+
+ UI->>API: Send optimization request
+
+ Note over API,LLM: Optimization Process Begins
+
+ API->>API: Initialize parameters
+
+ loop For each mutation round (1 to N)
+ Note over API,LLM: Mutation Phase
+ API->>LLM: Send current prompt for mutation
+ LLM->>API: Return mutated prompt
+
+ Note over API,LLM: Refinement Phase
+ loop For each refinement step (1 to M)
+ API->>LLM: Send mutated prompt for refinement
+ LLM->>API: Return refined prompt
+ end
+
+ API->>API: Update current prompt with refined version
+ end
+
+ API->>UI: Return final optimized prompt
+ UI->>User: Display optimized prompt
+```
+
+
+## Application Flow
+
+
+ Application Flow
+
+```mermaid
+sequenceDiagram
+ title PromptWizard Test Values Feature Flow
+
+ actor User
+ participant UI as PromptWizard UI
+ participant API as Backend API
+ participant LLM as Gemini API
+
+ User->>+UI: Clicks "Test Values" button
+
+ rect rgb(240, 240, 255)
+ Note over UI: Form Auto-Population Phase
+ UI->>UI: Fill task description with image generation prompt
+ UI->>UI: Fill base instruction with Ghibli-style aesthetics
+ UI->>UI: Set model to Gemini
+ UI->>UI: Set refine steps to 2
+ UI->>UI: Configure optimization parameters
+ UI->>UI: Set evaluation criteria
+ end
+
+ UI-->>-User: Display populated form
+
+ User->>UI: Reviews pre-filled values
+ User->>+UI: Enters API key
+ UI->>UI: Validate API key
+ UI-->>-User: Show validation result
+
+ User->>+UI: Clicks "Optimize Prompt" button
+
+ rect rgb(255, 240, 240)
+ Note over UI,API: Request Processing Phase
+ UI->>+API: Send optimization request with pre-filled values
+ API->>API: Parse and validate request
+ API->>API: Prepare optimization parameters
+ end
+
+ rect rgb(240, 255, 240)
+ Note over API,LLM: Optimization Phase
+ API->>+LLM: Send request to Gemini API
+ LLM->>LLM: Generate prompt variations
+ LLM->>LLM: Perform mutation rounds
+ LLM->>LLM: Evaluate variations
+ LLM->>LLM: Refine best prompts
+ LLM-->>-API: Return optimized prompt
+ end
+
+ rect rgb(255, 255, 240)
+ Note over API,UI: Result Processing Phase
+ API->>API: Process optimization results
+ API->>API: Calculate performance metrics
+ API->>-UI: Return optimization results
+ end
+
+ UI->>UI: Format results for display
+ UI-->>-User: Display optimized prompt and performance metrics
+
+ rect rgb(240, 255, 255)
+ Note over User,UI: Post-Optimization Actions
+ User->>+UI: Click "Copy to Clipboard"
+ UI-->>-User: Prompt copied to clipboard
+
+ alt Export Configuration
+ User->>+UI: Click "Export Config"
+ UI->>UI: Generate YAML configuration
+ UI-->>-User: Download configuration file
+ end
+ end
+
+ Note over User: Ready to use optimized prompt in actual applications
+```
+
+
+The optimization process follows these steps:
+
+1. **User Input**: The user provides task description, base instruction, and configuration
+2. **API Processing**: The backend processes the request and prepares for optimization
+3. **LLM Interaction**: The system interacts with the selected LLM (Gemini/GPT-4)
+4. **Optimization Loop**: Multiple rounds of mutation and refinement occur
+5. **Result Generation**: The optimized prompt is generated and returned
+6. **UI Display**: Results are displayed to the user with evaluation metrics
+
+## Project Structure
+
+- `ui/` - Frontend Next.js application
+- `api/` - Backend Flask API
+
+## Getting Started 🚀
+
+### Prerequisites
+
+- Node.js (v18+)
+- Python (v3.8+)
+- API keys for LLMs (Gemini API key required for testing)
+
+### Installation
+
+1. Install frontend dependencies:
+ ```bash
+ cd ui
+ npm install
+ ```
+
+2. Install backend dependencies:
+ ```bash
+ cd api
+ pip install -r requirements.txt
+ ```
+
+3. Set up environment variables:
+ - Create a `.env` file in the `api/` directory
+ - Add your API keys:
+ ```
+ GOOGLE_API_KEY=your_gemini_api_key
+ OPENAI_API_KEY=your_openai_api_key
+ ```
+
+### Running the Application
+
+1. Start the backend API:
+ ```bash
+ cd api
+ python app.py
+ ```
+
+2. Start the frontend development server:
+ ```bash
+ cd ui
+ npm run dev
+ ```
+
+3. Open your browser and navigate to `http://localhost:3000`
+
+### Quick Testing with Test Values
+
+1. Open the application in your browser
+2. Click the "Test Values" button in the top-right corner
+3. Review the pre-filled form with sample values for image generation
+4. Enter your Gemini API key
+5. Click "Optimize Prompt" to test the functionality
+
+> **Note:** The Test Values feature is designed to work with the Gemini model, which has been thoroughly tested. Other models may require additional configuration.
+
+## Features
+
+### Prompt Input
+Enter task descriptions and base instructions for optimization.
+
+### Dataset Selection
+Choose from predefined datasets (GSM8k, SVAMP, AQUARAT, BBII) or use custom data.
+
+### Configuration Options
+- **Mutation Rounds**: Number of iterations for prompt mutation
+- **Refine Steps**: Number of refinement steps
+- **In-context Examples**: Option to use examples during optimization
+
+### Model Selection
+Choose between Gemini, GPT-4, or custom models.
+
+### Evaluation Metrics
+Select criteria for evaluating prompts:
+- Accuracy
+- Clarity
+- Completeness
+- Relevance
+- Conciseness
+
+### Export Options
+- Download optimized prompts as text files
+- Export configurations as YAML files
-## Installation ⬇️
+## Deployment 🌐
-Follow these steps to set up the development environment and install the package:
+### One-Click Vercel Deployment (Recommended)
-1) Clone the repository
- ```
- git clone https://github.com/microsoft/PromptWizard
- cd PromptWizard
- ```
-2) Create and activate a virtual environment
-
- On Windows
- ```
- python -m venv venv
- venv\Scripts\activate
- ```
- On macOS/Linux:
- ```
- python -m venv venv
- source venv/bin/activate
- ```
-3) Install the package in development mode:
- ```
- pip install -e .
- ```
-
-
-## Quickstart 🏃
-
-There are three main ways to use PromptWizard:
-- Scenario 1 : Optimizing prompts without examples
-- Scenario 2 : Generating synthetic examples and using them to optimize prompts
-- Scenario 3 : Optimizing prompts with training data
-
-**NOTE** : Refer this [notebook](demos/scenarios/dataset_scenarios_demo.ipynb) to get a detailed understanding of the usage for each of the scenarios. **This serves as a starting point to understand the usage of PromptWizard**
-
-#### High level overview of using PromptWizard
-- Decide your scenario
-- Fix the configuration and environmental varibles for API calling
- - Use ```promptopt_config.yaml``` to set configurations. For example for GSM8k this [file](demos/gsm8k/configs/promptopt_config.yaml) can be used
- - Use ```.env``` to set environmental varibles. For GSM8k this [file](demos/gsm8k/.env) can be used
- ```
- USE_OPENAI_API_KEY="XXXX"
- # Replace with True/False based on whether or not to use OPENAI API key
-
- # If the first variable is set to True then fill the following two
- OPENAI_API_KEY="XXXX"
- OPENAI_MODEL_NAME ="XXXX"
-
- # If the first variable is set to False then fill the following three
- AZURE_OPENAI_ENDPOINT="XXXXX"
- # Replace with your Azure OpenAI Endpoint
-
- OPENAI_API_VERSION="XXXX"
- # Replace with the version of your API
-
- AZURE_OPENAI_CHAT_DEPLOYMENT_NAME="XXXXX"
- # Create a deployment for the model and place the deployment name here.
- ```
-- Run the code
- - To run PromptWizard on your custom dataset please jump [here](#run-on-custom-dataset)
-
-#### Running PromptWizard with training data (Scenario 3)
-- We support [GSM8k](https://huggingface.co/datasets/openai/gsm8k), [SVAMP](https://huggingface.co/datasets/ChilleD/SVAMP), [AQUARAT](https://huggingface.co/datasets/deepmind/aqua_rat) and [Instruction_Induction(BBII)](https://github.com/xqlin98/INSTINCT/tree/main/Induction/experiments/data/instruction_induction/raw) datasets
-- Please note that time taken for prompt optimzation is dependent on the dataset. In our experiments for the above mentioned datasets, it took around 20 - 30 minutes on average.
-
-#### Running on GSM8k (AQUARAT/SVAMP)
-
-- Please note that this code requires access to LLMs via API calling for which we support AZURE endpoints or OPENAI keys
-- Set the AZURE endpoint configurations in [.env](demos/gsm8k/.env)
-- Follow the steps in [demo.ipynb](demos/gsm8k/demo.ipynb) to download the data, run the prompt optimization and carry out inference.
-
-#### Running on BBII
-
-- BBII has many datasets in it, based on the dataset set the configs [here](demos/bbh/configs/promptopt_config.yaml)
-- In configs ```task_description```,```base_instruction``` and ```answer_format``` need to be changed for different datasets in BBII, the rest of the configs remain the same
-- A demo is presented in [demo.ipynb](demos/bbh/demo.ipynb)
-
-
-
-## Run on Custom Datasets 🗃️
-
-### Create Custom Dataset
-- Our code expects the dataset to be in ```.jsonl``` file format
-- Both the train and test set follow the same format
-- Every sample in the ```.jsonl``` should have 2 fields :
- 1) ```question``` : It should contain the complete question that is to asked to the LLM
- 2) ```answer``` : It should contain the ground truth answer which can be verbose or concise
-
-
-### Run on Custom Dataset
-
-NOTE : Refer to [demos](demos) folder for examples of folders for four datasets. The ```.ipynb``` in each of the folders shows how to run PromptWizard on that particular dataset. A similar procedure can be followed for a new dataset. Below is the explanation of each of the components of the ```.ipynb``` and the dataset specifc folder structure in detail
-
-#### Steps to be followed for custom datasets
-
-1) Every new dataset needs to have the following
- - ```configs``` folder to store files for defining optimization hyperparameters and setup configs
- - ```data``` folder to store ```train.jsonl``` and ```test.jsonl``` as curated [here](#create-custom-dataset) (this is done in the notebooks)
- - ```.env``` file for environment varibles to be used for API calling
- - ```.py/.ipynb``` script to run the code
-
-2) Set the hyperparameters like number of mutations, refine steps, in-context examples etc.
- - Set the following in [promptopt_config.yaml](demos/gsm8k/configs/promptopt_config.yaml) :
- - ```task_description``` : Desciption of the task at hand which will be fed into the prompt
- - For GSM8k a description like the following can be used
- ```
- You are a mathematics expert. You will be given a mathematics problem which you need to solve
- ```
- - ```base_instruction``` : Base instruction in line with the dataset
- - A commonly used base instruction could be
- ```
- Lets think step by step.
- ```
- - ```answer_format``` : Instruction for specifying the answer format
- - It is crucial to set the ```answer_format``` properly to ensure correct extraction by ```def extract_final_answer()```
- - Answer format could be :
- ```
- At the end, wrap only your final option between and tags
- ```
- Then in ```def extract_final_answer()``` we can simply write code to extract string between the tags
-
- - ```seen_set_size``` : The number of train samples to be used for prompt optimization
- - In our experiments we set this to be 25. In general any number between 20-50 would work
- - ```few_shot_count``` : The number of in-context examples needed in the prompt
- - The value can be set to any positive integer based on the requirement
- - For generating zero-shot prompts, set the values to a small number (i.e between 2-5) and after the final prompt is generated the in-context examples can be removed. We suggest using some in-context examples as during the optimization process the instructions in the prompt are refined using in-context examples hence setting it to a small number will give better zero-shot instructions in the prompt
- - ```generate_reasoning``` : Whether or not to generate reasoning for the in-context examples
- - In our experiments we found it to improve the prompt overall as it provides a step-by-step approach to reach the final answer. However if there is a constraint on the prompt length or number of prompt tokens, it can be turned off to get smaller sized prompts
- - ```generate_expert_identity``` and ```generate_intent_keywords``` : Having these helped improve the prompt as they help making the prompt relevant to the task
- - Refer ```promptopt_config.yaml``` files in folders present [here](demos) for the descriptions used for AQUARAT, SVAMP and GSM8k. For BBII refer [description.py](demos/bbh/description.py) which has the meta instructions for each of the datasets
- - Following are the global parameters which can be set based on the availability of the training data
- - ```run_without_train_examples``` is a global hyperparameter which can be used when there are no training samples and in-context examples are not required in the final prompt
- - ```generate_synthetic_examples``` is a global hyperparameter which can be used when there are no training samples and we want to generate synthetic data for training
- - ```use_examples``` is a global hyperparameter which can be used to optimize prompts using training data
-3) Create a dataset specific class which inherits ```class DatasetSpecificProcessing``` similar to ```GSM8k(DatasetSpecificProcessing)``` in [demo.ipynb](demos/gsm8k/demo.ipynb) and define the following functions in it
- 1) In ```def extract_answer_from_output()``` : This is a dataset specific function, given the ```answer``` from the dataset it should extract and return a concise form of the answer. Note that based on the dataset it can also simply return the ```answer``` as it is like in case of SVAMP and AQUARAT datasets
- 2) ```def extract_final_answer()``` : This is a LLM output specific function, given the verbose answer from the LLM it should extract and return the concise final answer
- 3) Define ```def access_answer()``` : This function takes an input the LLM output, then does the following:
- - Extracts the concise answer using ```def extract_final_answer()``` from the LLM output as defined above
- - Evaluates the extracted answer with the ground truth and retuns
- - Extracted answer from LLM output
- - Boolean value indicating if answer is correct or not
- - The evaluation done here is dataset specific, for datasets like GSM8k, SVAMP and AQUARAT which have final answer as an number, we can do a direct match between the numbers generated and the ground truth, while for datasets where the answer is a sentence or paragraph it would be better to do evaluation with llm-as-a-judge, to compare the generated and ground truth paragraph/sentence. An example is available in ```def access_answer()``` in [this](demos/bbh/demo.ipynb) notebook
+The application is pre-configured for seamless deployment on Vercel:
+1. Push your code to a GitHub repository
+2. Connect the repository to Vercel
+3. Set up environment variables in the Vercel dashboard:
+ (refer to .env.example for all env key names)
+ - `GOOGLE_API_KEY`: Your Gemini API key (required for testing)
+ - `OPENAI_API_KEY`: Your OpenAI API key (optional)
+4. Click "Deploy"
+
+> **Note:** For detailed Vercel deployment instructions, see our [Vercel Deployment Guide](VERCEL_DEPLOYMENT.md).
+
+### Other Deployment Options
+
+- **Docker**: Use our Docker configuration for containerized deployment
+- **Cloud Platforms**: Deploy to AWS, Azure, or Google Cloud
+- **Traditional Hosting**: Deploy to any platform that supports Node.js and Python
+
+For more deployment options, see our [Deployment Guide](DEPLOYMENT.md).
+
+## System Architecture 🏗️
+
+The PromptWizard UI system consists of three main components:
+
+1. **Frontend (Next.js)**: Provides the user interface for configuring and running prompt optimizations
+2. **Backend API (Flask)**: Handles requests from the frontend and communicates with the PromptWizard core
+3. **PromptWizard Core**: Microsoft's optimization engine that performs the actual prompt optimization
+
+### Data Flow Diagram
+
+## Integration with Microsoft PromptWizard 🔄
+
+This UI is built on top of Microsoft's PromptWizard framework, providing a user-friendly interface for prompt optimization. It leverages the powerful core functionality of PromptWizard while making it accessible to users without coding experience.
+
+### What Microsoft PromptWizard Provides:
+- Core prompt optimization algorithms
+- Training and evaluation logic
+- Dataset handling capabilities
+
+### What Our UI Adds:
+- Intuitive tabbed interface
+- Visual configuration of parameters
+- One-click optimization
+- Export and sharing capabilities
+- Quick testing with pre-configured values
+- Simplified deployment options
## How PromptWizard Works 🔍
-- Using the problem description and initial prompt instruction, PW generates variations of the instruction by prompting LLMs to mutate it. Based on performance, the best prompt is selected. PW incorporates a critique component that provides feedback, thus guiding and refining the prompt over multiple iterations.
+- Using the problem description and initial prompt instruction, PW generates variations of the instruction by prompting LLMs to mutate it. Based on performance, the best prompt is selected. PW incorporates a critique component that provides feedback, thus guiding and refining the prompt over multiple iterations.
- PW also optimizes in-context examples. PW selects a diverse set of examples
from the training data, identifying positive and negative examples based on their performance with
-the modified prompt. Negative examples help inform further prompt refinements.
-- Examples and instructions are sequentially optimized, using the critique to generate synthetic examples that address the current prompt’s weaknesses. These examples are integrated to further refine the prompt.
-- PW generates detailed reasoning chains via Chain-of-Thought (CoT), enriching the prompt’s capacity for problem-solving.
+the modified prompt. Negative examples help inform further prompt refinements.
+- Examples and instructions are sequentially optimized, using the critique to generate synthetic examples that address the current prompt’s weaknesses. These examples are integrated to further refine the prompt.
+- PW generates detailed reasoning chains via Chain-of-Thought (CoT), enriching the prompt’s capacity for problem-solving.
- PW aligns prompts with human reasoning by integrating task intent and expert
personas, enhancing both model performance and interpretability.
-## Configurations ⚙️
+## Configurations ⚙️
Here we define the various hyperparameters used in prompt optimization process found in [promptopt_config.yaml](demos/gsm8k/configs/promptopt_config.yaml)
- ```mutate_refine_iterations```: Number of iterations for conducting mutation of task description
followed by refinement of instructions
- ```mutation_rounds```: Number of rounds of mutation to be performed when generating different styles
-- ```refine_task_eg_iterations```: Number of iterations for refining task description and in context examples
+- ```refine_task_eg_iterations```: Number of iterations for refining task description and in context examples
- ```style_variation```: Number of thinking style variations to be used in prompt mutation
- ```questions_batch_size```: Number of questions to be asked to LLM in a single batch, during training step
- ```min_correct_count```: Minimum number of batches of questions to correctly answered, for a prompt to be considered as performing good
@@ -213,14 +337,55 @@ Here we define the various hyperparameters used in prompt optimization process f
- ```seen_set_size```: Number of samples from trainset to be used for training
- ```few_shot_count```: Number of in-context examples required in final prompt
+## Web UI Features 🖥️
+
+The PromptWizard Web UI provides a user-friendly interface for prompt optimization with the following features:
+
+### Tabbed Interface
+- **Basic Info**: Configure task description, base instruction, answer format, model, and API key
+- **Data Selection**: Choose datasets, configure in-context examples, and preview data
+- **Prompt Configuration**: Select optimization scenarios and configure advanced parameters
+- **Evaluation**: Set evaluation criteria and manage optimization sessions
+
+#### Basic Info Tab
+
+#### Data Selection Tab
+
+#### Prompt Configuration Tab
+
+#### Evaluation Tab
+
+### Advanced Features
+- **Advanced Optimization Parameters**: Fine-tune the optimization process with parameters like mutate refine iterations, refine task examples iterations, and more
+- **Advanced Evaluation Metrics**: Use metrics like Faithfulness, Semantic Similarity, Context Relevancy, and more
+- **Dataset Preview**: Visualize and inspect your dataset before optimization
+- **Multimodal Support**: Optimize prompts for image-based tasks with image uploads
+- **Session Management**: Save and load optimization sessions for later use
+- **Test Values Button**: ✨ Quickly populate the form with pre-configured values for image generation prompts to test functionality
+
+### Results Page
+
+The results page displays:
+- The optimized prompt
+- Performance metrics and evaluation scores
+- Comparison with the original prompt
+- Export options (copy to clipboard, download as text, export configuration)
+
+### Deployment Options
+- **Local Development**: Run the UI and API locally for development
+- **Docker Deployment**: Use Docker for containerized deployment ([see Docker instructions](DEPLOYMENT.md#docker-deployment))
+- **Vercel Deployment**: One-click deployment to Vercel ([see Vercel guide](VERCEL_DEPLOYMENT.md))
+- **Cloud Deployment**: Deploy to platforms like Azure, AWS, or GCP ([see Cloud instructions](DEPLOYMENT.md#cloud-deployment))
+
## Best Practices 💡
-Following are some of best pracitices we followed during are experiments
+Following are some of best pracitices we followed during are experiments
- Regarding the parameters in [promptopt_config.yaml](demos/gsm8k/configs/promptopt_config.yaml)
- We found the best performing values for ```mutate_refine_iterations```,```mutation_rounds```,```refine_task_eg_iterations``` to be 3 or 5
- Other parameters have been set to their ideal values. ```seen_set_size``` can be increased to 50 and ```few_shot_count``` can be set based on the use case
- The prompts generated at the end of the training process are usually very detailed, however user supervision can help tune it further for the task at hand
-- Trying both configurations of having synthetic in-context examples or in-context examples from the train set can be tried to find the best prompt based on use case.
+- Trying both configurations of having synthetic in-context examples or in-context examples from the train set can be tried to find the best prompt based on use case.
+- When using the Web UI, the "Test Values" button provides a good starting point with pre-configured values for image generation prompts
## Results 📈
@@ -231,35 +396,57 @@ thresholds, maintaining the highest p(τ) values, indicating that it consistentl
possible accuracy across all tasks
-
- The fiqure shows the performance profile curve for the instruction induction
tasks. The performance profile curve visualizes how frequently
different approaches’ performance is within a given distance of the best performance. In this curve,
the x-axis (τ) represents the performance ratio relative to the best-performing method, and the y-axis
(p(τ )) reflects the fraction of tasks where a method’s performance is within this ratio. So for a given
-method, the curve tells what percentage of the tasks are within τ distance to the best performance.
+method, the curve tells what percentage of the tasks are within τ distance to the best performance.
+## Contributing 🤝
-## How to contribute: ✋
-This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit https://cla.microsoft.com.
-When you submit a pull request, a CLA-bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repositories using our CLA.
-This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact opencode@microsoft.com with any additional questions or comments.
+### We'd Love Your Help!
-## Citation 📝
+This project was built in a single coding session, so there's plenty of room for improvement and expansion. Your contributions are not just welcome—they're essential to making this tool better for everyone!
+
+### Areas Where You Can Help:
-If you make use of our work, please cite our paper:
+- **Feature Implementation**: Help complete and test features for different LLM models
+- **UI Enhancements**: Improve the user interface and experience
+- **Documentation**: Enhance the documentation with examples and tutorials
+- **Bug Fixes**: Help identify and fix bugs in the codebase
+- **Testing**: Contribute to testing different features and models
+
+### How to Contribute:
+
+1. **Fork the repository**
+2. **Create a feature branch**: `git checkout -b feature/amazing-feature`
+3. **Commit your changes**: `git commit -m 'Add some amazing feature'`
+4. **Push to the branch**: `git push origin feature/amazing-feature`
+5. **Open a Pull Request**
+
+### Original PromptWizard Contribution Guidelines:
+
+This project builds on Microsoft's PromptWizard. For contributions to the core framework, please note that most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit https://cla.microsoft.com.
+
+This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/).
+
+## Citation 📝
```
@misc{agarwal2024promptwizardtaskawarepromptoptimization,
- title={PromptWizard: Task-Aware Prompt Optimization Framework},
+ title={PromptWizard: Task-Aware Prompt Optimization Framework},
author={Eshaan Agarwal and Joykirat Singh and Vivek Dani and Raghav Magazine and Tanuja Ganu and Akshay Nambi},
year={2024},
eprint={2405.18369},
archivePrefix={arXiv},
primaryClass={cs.CL},
- url={https://arxiv.org/abs/2405.18369},
+ url={https://arxiv.org/abs/2405.18369},
}
```
-## Responsible AI Considerations
+## Responsible AI Considerations
For guidelines and best practices related to Responsible AI, please refer to our [Responsible AI Guidelines](RESPONSIBLE_AI.md).
+## Contributing
+
+Contributions are welcome! Please feel free to submit a Pull Request.
diff --git a/VERCEL_DEPLOYMENT.md b/VERCEL_DEPLOYMENT.md
new file mode 100644
index 00000000..6139d529
--- /dev/null
+++ b/VERCEL_DEPLOYMENT.md
@@ -0,0 +1,130 @@
+# Deploying PromptWizard on Vercel
+
+This guide provides step-by-step instructions for deploying PromptWizard on Vercel.
+
+## Prerequisites
+
+- A [Vercel](https://vercel.com) account
+- A GitHub account (for connecting your repository)
+- API keys for the LLM services you plan to use (Gemini, OpenAI, etc.)
+
+## Deployment Steps
+
+### 1. Fork or Clone the Repository
+
+First, fork or clone the PromptWizard repository to your GitHub account.
+
+### 2. Connect to Vercel
+
+1. Log in to your Vercel account
+2. Click "Add New..." and select "Project"
+3. Import your GitHub repository
+4. Select the PromptWizard repository
+
+### 3. Configure Project Settings
+
+1. **Framework Preset**: Select "Other"
+2. **Root Directory**: Leave as is (should be the root of the repository)
+3. **Build Command**: Leave blank (defined in vercel.json)
+4. **Output Directory**: Leave blank (defined in vercel.json)
+
+### 4. Environment Variables
+
+Add the following environment variables:
+
+| Name | Value | Description |
+|------|-------|-------------|
+| `GOOGLE_API_KEY` | Your Gemini API key | Required for Gemini model |
+| `OPENAI_API_KEY` | Your OpenAI API key | Required for GPT-4 model |
+| `NEXT_PUBLIC_API_URL` | `/api` | API URL for the frontend |
+
+You can add these as plain text or as [Vercel Secrets](https://vercel.com/docs/concepts/projects/environment-variables#securing-environment-variables) for better security.
+
+### 5. Deploy
+
+Click "Deploy" and wait for the deployment to complete. Vercel will automatically build and deploy both the API and UI components based on the configuration in `vercel.json`.
+
+## Vercel Configuration
+
+The `vercel.json` file in the repository root configures the deployment:
+
+```json
+{
+ "version": 2,
+ "builds": [
+ {
+ "src": "api/app.py",
+ "use": "@vercel/python"
+ },
+ {
+ "src": "ui/package.json",
+ "use": "@vercel/next"
+ }
+ ],
+ "routes": [
+ {
+ "src": "/api/(.*)",
+ "dest": "api/app.py"
+ },
+ {
+ "src": "/(.*)",
+ "dest": "ui/$1"
+ }
+ ]
+}
+```
+
+This configuration:
+- Builds the Python API using the Vercel Python runtime
+- Builds the Next.js UI using the Vercel Next.js runtime
+- Routes API requests to the Python backend
+- Routes all other requests to the Next.js frontend
+- Sets up environment variables
+
+## Vercel Serverless Functions Limitations
+
+Vercel serverless functions have some limitations to be aware of:
+
+1. **Execution Time**: Functions have a maximum execution time of 10 seconds on the Hobby plan and 60 seconds on the Pro plan. Prompt optimization can take longer than this.
+
+2. **Memory**: Functions are limited to 1GB of memory on the Hobby plan and 3GB on the Pro plan.
+
+3. **Cold Starts**: Serverless functions may experience cold starts, which can add latency to the first request after a period of inactivity.
+
+For production use with heavy optimization workloads, consider:
+- Upgrading to a Vercel Pro plan
+- Using a different deployment option like Docker on a VPS
+- Implementing a queue system for long-running tasks
+
+## Troubleshooting
+
+### API Connection Issues
+
+If the UI cannot connect to the API, check:
+1. The `NEXT_PUBLIC_API_URL` environment variable is set correctly
+2. The API routes in `vercel.json` are correct
+3. The API is successfully deployed (check Vercel logs)
+
+### Long-Running Operations
+
+If prompt optimization times out:
+1. Consider implementing a queue system for long-running tasks
+2. Break down the optimization process into smaller steps
+3. Use a different deployment option for production workloads
+
+### API Key Issues
+
+If you encounter API key errors:
+1. Verify the API keys are correctly set in the Vercel environment variables
+2. Check that the API keys have the necessary permissions
+3. Ensure you have sufficient quota/credits for the LLM services
+
+## Monitoring and Logs
+
+Vercel provides logs and monitoring for your deployment:
+1. Go to your project in the Vercel dashboard
+2. Click on "Deployments" to see all deployments
+3. Select a deployment to view its logs
+4. Use the "Functions" tab to see serverless function metrics
+
+For more detailed monitoring, consider integrating with services like Sentry or LogRocket.
diff --git a/api/Dockerfile b/api/Dockerfile
new file mode 100644
index 00000000..50dbbaac
--- /dev/null
+++ b/api/Dockerfile
@@ -0,0 +1,22 @@
+FROM python:3.9-slim
+
+WORKDIR /app
+
+# Copy requirements file
+COPY requirements.txt .
+
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application code
+COPY . .
+
+# Expose port
+EXPOSE 5000
+
+# Set environment variables
+ENV FLASK_APP=app.py
+ENV FLASK_ENV=production
+
+# Run the application with Gunicorn
+CMD ["gunicorn", "--bind", "0.0.0.0:5000", "app:app"]
diff --git a/api/README.md b/api/README.md
new file mode 100644
index 00000000..6c581fe1
--- /dev/null
+++ b/api/README.md
@@ -0,0 +1,127 @@
+# PromptWizard API 🚀
+
+Backend API for the PromptWizard UI.
+
+
+
+
+ PromptWizard API architecture and components
+
+
+## Overview
+
+This API provides endpoints for optimizing prompts using the PromptWizard framework. It's built with Flask and designed to work with the PromptWizard UI frontend.
+
+
+
+
+ Sequence diagram showing the interaction between UI, API, and LLM services
+
+
+## API Endpoints
+
+
+
+
+ Overview of PromptWizard API endpoints and their functions
+
+
+### `POST /api/optimize_prompt`
+
+Optimizes a prompt based on the provided parameters.
+
+
+
+
+ The prompt optimization process flow in the API
+
+
+**Request Body:**
+
+```json
+{
+ "taskDescription": "String - Description of the task",
+ "baseInstruction": "String - Initial prompt to optimize",
+ "answerFormat": "String - Desired output format",
+ "model": "String - Model to use (Gemini, GPT-4, etc.)",
+ "mutationRounds": "Number - Number of mutation rounds",
+ "refineSteps": "Number - Number of refinement steps",
+ "mutateRefineIterations": "Number - Number of iterations for mutation and refinement",
+ "refineTaskEgIterations": "Number - Number of iterations for refining task examples",
+ "refineInstruction": "Boolean - Whether to refine instructions after mutation",
+ "minCorrectCount": "Number - Minimum number of correct answers required",
+ "maxEvalBatches": "Number - Maximum number of evaluation batches",
+ "topN": "Number - Number of top prompts to consider",
+ "questionsBatchSize": "Number - Batch size for questions during training",
+ "useExamples": "Boolean - Whether to use in-context examples",
+ "generateSyntheticExamples": "Boolean - Whether to generate synthetic examples",
+ "generateExpertIdentity": "Boolean - Whether to generate expert identity",
+ "generateIntentKeywords": "Boolean - Whether to generate intent keywords",
+ "styleVariation": "Number - Number of style variations to generate",
+ "fewShotCount": "Number - Number of few-shot examples to include",
+ "dataset": "String - Dataset to use (GSM8k, SVAMP, etc.)",
+ "evaluationCriteria": "Array - Basic criteria for evaluation",
+ "advancedEvaluationMetrics": "Array - Advanced metrics for evaluation",
+ "enableMultimodal": "Boolean - Whether to enable multimodal support",
+ "saveSession": "Boolean - Whether to save the optimization session",
+ "sessionName": "String - Name for the saved session",
+ "apiKey": "String - API key for the selected model"
+}
+```
+
+**Response:**
+
+```json
+{
+ "success": true,
+ "optimizedPrompt": "String - The optimized prompt"
+}
+```
+
+## Getting Started
+
+### Prerequisites
+
+- Python 3.8+
+- API keys for LLMs (Gemini, OpenAI, etc.)
+
+### Installation
+
+1. Install dependencies:
+ ```
+ pip install -r requirements.txt
+ ```
+
+2. Set up environment variables:
+ - Create a `.env` file in the root directory
+ - Add your API keys:
+ ```
+ GOOGLE_API_KEY=your_gemini_api_key
+ OPENAI_API_KEY=your_openai_api_key
+ ```
+
+### Running the API
+
+```
+python app.py
+```
+
+The API will be available at `http://localhost:5000`.
+
+## Deployment
+
+The API is configured for deployment on Vercel using the Vercel Python runtime.
+
+## Error Handling
+
+The API returns appropriate HTTP status codes and error messages:
+
+- `200 OK`: Request successful
+- `400 Bad Request`: Invalid request parameters
+- `500 Internal Server Error`: Server-side error
+
+## Security Considerations
+
+- API keys are stored in environment variables
+- CORS is enabled for the frontend
+- Input validation is performed on all requests
diff --git a/api/app.py b/api/app.py
new file mode 100644
index 00000000..896fc66c
--- /dev/null
+++ b/api/app.py
@@ -0,0 +1,415 @@
+from flask import Flask, request, jsonify
+from flask_cors import CORS
+import os
+import yaml
+import google.generativeai as genai
+from dotenv import load_dotenv
+from dataclasses import dataclass
+import logging
+
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+app = Flask(__name__)
+CORS(app) # Enable CORS for all routes
+
+# Load environment variables
+load_dotenv()
+
+# Configure global error handlers to ensure JSON responses
+@app.errorhandler(404)
+def not_found(e):
+ return jsonify({"success": False, "error": "Endpoint not found"}), 404
+
+@app.errorhandler(500)
+def server_error(e):
+ return jsonify({"success": False, "error": "Internal server error"}), 500
+
+@app.errorhandler(Exception)
+def handle_exception(e):
+ logger.error(f"Unhandled exception: {str(e)}")
+ return jsonify({
+ "success": False,
+ "error": str(e) if app.debug else "Internal server error"
+ }), 500
+
+# Health check endpoint
+@app.route('/api/health', methods=['GET'])
+def health_check():
+ return jsonify({
+ "success": True,
+ "status": "API is running",
+ "version": "1.0.0"
+ })
+
+# API key validation endpoint
+@app.route('/api/validate_key', methods=['POST'])
+def validate_key():
+ try:
+ data = request.json
+ api_key = data.get('apiKey', '')
+ model = data.get('model', 'Gemini')
+
+ if not api_key:
+ return jsonify({
+ "success": False,
+ "valid": False,
+ "message": "No API key provided"
+ })
+
+ # For Gemini model
+ if model == 'Gemini':
+ try:
+ # Try to initialize Gemini with the provided key
+ genai.configure(api_key=api_key)
+ model = genai.GenerativeModel('gemini-2.0-flash')
+
+ # Try a simple generation to validate the key
+ logger.info("Testing API key with a simple generation...")
+ response = model.generate_content("Hello, testing API key validation.")
+ logger.info(f"API key validation successful: {response.text[:30]}...")
+
+ return jsonify({
+ "success": True,
+ "valid": True,
+ "message": "API key is valid"
+ })
+ except Exception as e:
+ error_message = str(e)
+ logger.error(f"API key validation failed: {error_message}")
+
+ if "API key not valid" in error_message:
+ message = "Invalid API key"
+ else:
+ message = f"Error validating API key: {error_message}"
+
+ return jsonify({
+ "success": False,
+ "valid": False,
+ "message": message
+ })
+
+ # For other models (mock validation for now)
+ return jsonify({
+ "success": True,
+ "valid": True,
+ "message": f"API key validation for {model} is not implemented yet. Assuming valid."
+ })
+
+ except Exception as e:
+ logger.error(f"Error in validate_key endpoint: {str(e)}")
+ return jsonify({
+ "success": False,
+ "valid": False,
+ "message": f"Error: {str(e)}"
+ }), 500
+
+@dataclass
+class PromptOptimizationParams:
+ task_description: str
+ base_instruction: str
+ answer_format: str
+ max_iterations: int
+ evaluation_criteria: list
+ temperature: float
+ mutate_refine_iterations: int = 3
+ refine_task_eg_iterations: int = 3
+ refine_instruction: bool = True
+ min_correct_count: int = 3
+ max_eval_batches: int = 6
+ top_n: int = 1
+ questions_batch_size: int = 1
+ generate_expert_identity: bool = True
+ generate_intent_keywords: bool = False
+ style_variation: int = 5
+ few_shot_count: int = 5
+ advanced_evaluation_metrics: list = None
+
+class PromptOptimizer:
+ def __init__(self, setup_config, task_description):
+ self.setup_config = setup_config
+ self.task_description = task_description
+
+ # Initialize Gemini
+ api_key = os.getenv("GOOGLE_API_KEY")
+ logger.info(f"API key from environment: {'VALID' if api_key and api_key != 'your-gemini-api-key' else 'NOT VALID OR MISSING'}")
+
+ if not api_key or api_key == "your-gemini-api-key":
+ logger.warning("GOOGLE_API_KEY not found or not valid in environment variables")
+ self.use_mock = True
+ else:
+ try:
+ # Configure the API
+ genai.configure(api_key=api_key)
+
+ # Test the API with a simple request
+ logger.info("Testing Gemini API connection...")
+ self.model = genai.GenerativeModel('gemini-2.0-flash')
+ test_response = self.model.generate_content("Hello, testing the API connection.")
+ logger.info(f"API test successful: {test_response.text[:30]}...")
+
+ self.use_mock = False
+ logger.info("Using REAL Gemini API for optimization")
+ except Exception as e:
+ import traceback
+ error_traceback = traceback.format_exc()
+ logger.error(f"Error initializing Gemini: {str(e)}\n{error_traceback}")
+ self.use_mock = True
+ logger.warning("Falling back to MOCK implementation due to API initialization error")
+
+ def optimize_prompt(self, base_prompt, criteria):
+ if self.use_mock:
+ # Return a mock optimized prompt for testing
+ logger.info(f"Using MOCK optimization (no valid API key) for: {base_prompt[:50]}...")
+
+ # Create a simple mock optimization by adding some text
+ optimized = base_prompt + "\n\nAdditional instructions: Please ensure all responses are clear, concise, and directly address the query. Maintain a helpful and informative tone throughout."
+
+ # Log a warning about using mock implementation
+ logger.warning("USING MOCK IMPLEMENTATION: No valid API key provided. To use the actual Gemini API, please provide a valid API key in the .env file or through the UI.")
+ return optimized
+ else:
+ # Use the actual Gemini model
+ try:
+ chat = self.model.start_chat(history=[])
+
+ optimization_prompt = f"""
+ Task: {self.task_description}
+ Base prompt: {base_prompt}
+ Evaluation criteria: {', '.join(criteria)}
+
+ Please optimize this prompt to better meet the evaluation criteria.
+ Return only the optimized prompt without any explanations.
+ """
+
+ logger.info(f"Sending optimization request to Gemini with prompt: {optimization_prompt[:100]}...")
+ response = chat.send_message(optimization_prompt)
+ logger.info(f"Received response from Gemini: {response.text[:100]}...")
+ return response.text
+ except Exception as e:
+ import traceback
+ error_traceback = traceback.format_exc()
+ logger.error(f"Error in optimize_prompt: {str(e)}\n{error_traceback}")
+ # Fallback to mock response with error details
+ return base_prompt + f"\n\n[Error occurred during optimization: {str(e)}]"
+
+ def get_best_prompt(self, params, use_examples=False, run_without_train_examples=True, generate_synthetic_examples=False):
+ current_prompt = params.base_instruction
+
+ logger.info(f"Starting prompt optimization with {params.max_iterations} iterations")
+ logger.info(f"Task: {params.task_description}")
+ logger.info(f"Advanced options: generate_expert_identity={params.generate_expert_identity}, generate_intent_keywords={params.generate_intent_keywords}")
+ logger.info(f"Examples options: use_examples={use_examples}, run_without_train_examples={run_without_train_examples}, generate_synthetic_examples={generate_synthetic_examples}")
+
+ # If generate_expert_identity is enabled, add expert profile to the prompt
+ if params.generate_expert_identity:
+ expert_profile = self.generate_expert_profile(params.task_description)
+ if not self.use_mock:
+ current_prompt = f"You are an expert in {params.task_description}.\n{expert_profile}\n\n{current_prompt}"
+ logger.info("Added expert profile to prompt")
+
+ # If generate_intent_keywords is enabled, add keywords to the prompt
+ if params.generate_intent_keywords:
+ keywords = self.generate_keywords(params.task_description)
+ if not self.use_mock:
+ current_prompt = f"{current_prompt}\n\nKeywords: {keywords}"
+ logger.info("Added intent keywords to prompt")
+
+ for i in range(params.max_iterations):
+ logger.info(f"Iteration {i+1}/{params.max_iterations}")
+ optimized_prompt = self.optimize_prompt(
+ current_prompt,
+ params.evaluation_criteria
+ )
+ current_prompt = optimized_prompt
+
+ # Return a tuple with the optimized prompt and None to match the expected return format
+ return current_prompt, None
+
+ def generate_expert_profile(self, task_description):
+ """Generate an expert profile based on the task description"""
+ if self.use_mock:
+ return "Expert in the field with extensive knowledge and experience."
+
+ try:
+ prompt = f"""
+ Generate a detailed expert profile for someone who is highly skilled at: {task_description}
+ The profile should describe their expertise, background, and skills.
+ Keep it to 2-3 sentences maximum.
+ """
+
+ response = self.model.generate_content(prompt)
+ return response.text.strip()
+ except Exception as e:
+ logger.error(f"Error generating expert profile: {str(e)}")
+ return "Expert in the field with extensive knowledge and experience."
+
+ def generate_keywords(self, task_description):
+ """Generate keywords based on the task description"""
+ if self.use_mock:
+ return "expertise, knowledge, skills, professional"
+
+ try:
+ prompt = f"""
+ Generate 5-7 keywords that capture the essence of this task: {task_description}
+ Return only the keywords separated by commas.
+ """
+
+ response = self.model.generate_content(prompt)
+ return response.text.strip()
+ except Exception as e:
+ logger.error(f"Error generating keywords: {str(e)}")
+ return "expertise, knowledge, skills, professional"
+
+@app.route('/api/optimize_prompt', methods=['POST'])
+def optimize_prompt():
+ try:
+ data = request.json
+ logger.info(f"Received optimization request: {data}")
+
+ # Extract parameters from request
+ task_description = data.get('taskDescription', '')
+ base_instruction = data.get('baseInstruction', '')
+ answer_format = data.get('answerFormat', '')
+ model = data.get('model', 'Gemini')
+ mutation_rounds = int(data.get('mutationRounds', 3))
+ refine_steps = int(data.get('refineSteps', 2))
+ mutate_refine_iterations = int(data.get('mutateRefineIterations', 3))
+ refine_task_eg_iterations = int(data.get('refineTaskEgIterations', 3))
+ refine_instruction = data.get('refineInstruction', True)
+ min_correct_count = int(data.get('minCorrectCount', 3))
+ max_eval_batches = int(data.get('maxEvalBatches', 6))
+ top_n = int(data.get('topN', 1))
+ questions_batch_size = int(data.get('questionsBatchSize', 1))
+ evaluation_criteria = data.get('evaluationCriteria', [])
+ advanced_evaluation_metrics = data.get('advancedEvaluationMetrics', [])
+ dataset = data.get('dataset', 'Custom')
+ custom_dataset = data.get('customDataset')
+ use_examples = data.get('useExamples', False)
+ generate_synthetic_examples = data.get('generateSyntheticExamples', False)
+ generate_expert_identity = data.get('generateExpertIdentity', True)
+ generate_intent_keywords = data.get('generateIntentKeywords', False)
+ style_variation = int(data.get('styleVariation', 5))
+ few_shot_count = int(data.get('fewShotCount', 5))
+ enable_multimodal = data.get('enableMultimodal', False)
+ save_session = data.get('saveSession', False)
+ session_name = data.get('sessionName', '')
+
+ # Log dataset information
+ if dataset == 'Custom' and custom_dataset:
+ logger.info(f"Custom dataset provided with {len(custom_dataset)} examples")
+ else:
+ logger.info(f"Using predefined dataset: {dataset}")
+
+ # If no evaluation criteria provided, use default ones
+ if not evaluation_criteria:
+ evaluation_criteria = ["Clarity", "Accuracy", "Completeness"]
+
+ # Create a simple setup config
+ setup_config = {
+ 'llm': {
+ 'model_type': model,
+ 'temperature': 0.0
+ }
+ }
+
+ # Initialize parameters
+ params = PromptOptimizationParams(
+ task_description=task_description,
+ base_instruction=base_instruction,
+ answer_format=answer_format,
+ max_iterations=mutation_rounds,
+ evaluation_criteria=evaluation_criteria,
+ temperature=0.0,
+ mutate_refine_iterations=mutate_refine_iterations,
+ refine_task_eg_iterations=refine_task_eg_iterations,
+ refine_instruction=refine_instruction,
+ min_correct_count=min_correct_count,
+ max_eval_batches=max_eval_batches,
+ top_n=top_n,
+ questions_batch_size=questions_batch_size,
+ generate_expert_identity=generate_expert_identity,
+ generate_intent_keywords=generate_intent_keywords,
+ style_variation=style_variation,
+ few_shot_count=few_shot_count,
+ advanced_evaluation_metrics=advanced_evaluation_metrics
+ )
+
+ # Get API key from request or environment
+ api_key = data.get('apiKey')
+ if api_key:
+ # If API key is provided in the request, temporarily set it in the environment
+ logger.info("Using API key from request")
+ os.environ["GOOGLE_API_KEY"] = api_key
+ else:
+ logger.info("No API key in request, using environment variable if available")
+
+ # Initialize optimizer
+ optimizer = PromptOptimizer(
+ setup_config=setup_config,
+ task_description=task_description
+ )
+
+ # Add custom dataset if provided
+ examples = None
+ if dataset == 'Custom' and custom_dataset and use_examples:
+ try:
+ # Convert custom dataset to the format expected by the optimizer
+ examples = []
+ for item in custom_dataset:
+ if 'input' in item and 'output' in item:
+ examples.append({
+ 'question': item['input'],
+ 'answer': item['output']
+ })
+ logger.info(f"Using {len(examples)} examples from custom dataset")
+ except Exception as e:
+ logger.error(f"Error processing custom dataset: {str(e)}")
+ examples = None
+
+ # Run optimization
+ optimized_prompt, _ = optimizer.get_best_prompt(
+ params,
+ use_examples=use_examples,
+ run_without_train_examples=(not use_examples),
+ generate_synthetic_examples=generate_synthetic_examples
+ )
+
+ return jsonify({
+ 'success': True,
+ 'optimizedPrompt': optimized_prompt
+ })
+
+ except Exception as e:
+ import traceback
+ error_traceback = traceback.format_exc()
+ logger.error(f"Error in optimize_prompt endpoint: {str(e)}\n{error_traceback}")
+
+ # Return a more user-friendly error message
+ error_message = str(e)
+ if "not found in environment variables" in error_message:
+ error_message = "API key not configured. Please check your API key."
+
+ return jsonify({
+ 'success': False,
+ 'error': error_message,
+ 'details': error_traceback if app.debug else None
+ }), 500
+
+if __name__ == '__main__':
+ # Set up proper error handling for production
+ if os.environ.get('FLASK_ENV') == 'production':
+ app.config['DEBUG'] = False
+ app.config['PROPAGATE_EXCEPTIONS'] = False
+ else:
+ app.config['DEBUG'] = True
+ app.config['PROPAGATE_EXCEPTIONS'] = True
+
+ # Run the app
+ # Try port 5000 first, fallback to 5001 if that's in use
+ try:
+ app.run(debug=app.config['DEBUG'], port=5000)
+ except OSError:
+ print("Port 5000 is in use, trying port 5001...")
+ app.run(debug=app.config['DEBUG'], port=5001)
diff --git a/api/requirements.txt b/api/requirements.txt
new file mode 100644
index 00000000..5eea3b5e
--- /dev/null
+++ b/api/requirements.txt
@@ -0,0 +1,14 @@
+flask==2.3.3
+flask-cors==4.0.0
+python-dotenv==1.0.0
+pyyaml==6.0.1
+google-generativeai==0.3.1
+requests==2.31.0
+openai>=1.14.0
+tiktoken==0.5.2
+nltk
+datasets==2.16.0
+pyarrow==15.0.2
+llama-index==0.11.10
+llama-index-core==0.11.10
+gunicorn==21.2.0
diff --git a/api/test_api.py b/api/test_api.py
new file mode 100644
index 00000000..8b98c021
--- /dev/null
+++ b/api/test_api.py
@@ -0,0 +1,36 @@
+import requests
+import json
+
+def test_optimize_prompt_api():
+ """Test the optimize_prompt API endpoint."""
+ url = "http://localhost:5000/api/optimize_prompt"
+
+ # Test data
+ data = {
+ "taskDescription": "Test task description",
+ "baseInstruction": "This is a test prompt that needs optimization.",
+ "answerFormat": "JSON",
+ "model": "Gemini",
+ "mutationRounds": 2,
+ "refineSteps": 1,
+ "useExamples": False,
+ "dataset": "Custom",
+ "evaluationCriteria": ["Clarity", "Accuracy"]
+ }
+
+ # Send request
+ print("Sending request to API...")
+ response = requests.post(url, json=data)
+
+ # Print results
+ print(f"Status code: {response.status_code}")
+ if response.status_code == 200:
+ result = response.json()
+ print("Success:", result["success"])
+ print("\nOptimized prompt:")
+ print(result["optimizedPrompt"])
+ else:
+ print("Error:", response.text)
+
+if __name__ == "__main__":
+ test_optimize_prompt_api()
diff --git a/configs/llm_config.yaml b/configs/llm_config.yaml
new file mode 100644
index 00000000..88b67e6e
--- /dev/null
+++ b/configs/llm_config.yaml
@@ -0,0 +1,28 @@
+azure_open_ai:
+ use_azure_ad: true
+ api_key: ""
+ azure_endpoint: ""
+ api_version: ""
+ models:
+ - unique_model_id: "gpt-4"
+ model_name_in_azure: "gpt-4"
+ deployment_name_in_azure: "gpt-4"
+ model_type: "chat"
+
+gemini:
+ api_key: "${GOOGLE_API_KEY}"
+ temperature: 0.0
+ max_tokens: 1024
+ models:
+ - unique_model_id: "gemini-flash"
+ model_name: "gemini-2.0-flash"
+ model_type: "chat"
+ - unique_model_id: "gemini-flash-vision"
+ model_name: "gemini-2.0-flash-vision"
+ model_type: "multi_modal"
+
+custom_models:
+ - unique_model_id: "custom-model-1"
+ class_name: "CustomLLMClass"
+ path_to_py_file: "path/to/custom_llm.py"
+ track_tokens: true
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 00000000..ba0ae59e
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,27 @@
+version: '3.8'
+
+services:
+ api:
+ build:
+ context: ./api
+ dockerfile: Dockerfile
+ ports:
+ - "5000:5000"
+ environment:
+ - FLASK_APP=app.py
+ - FLASK_ENV=production
+ volumes:
+ - ./api:/app
+ restart: unless-stopped
+
+ ui:
+ build:
+ context: ./ui
+ dockerfile: Dockerfile
+ ports:
+ - "3000:3000"
+ environment:
+ - NEXT_PUBLIC_API_URL=http://api:5000
+ depends_on:
+ - api
+ restart: unless-stopped
diff --git a/images/README.md b/images/README.md
new file mode 100644
index 00000000..a10789b4
--- /dev/null
+++ b/images/README.md
@@ -0,0 +1,110 @@
+# PromptWizard UI and API Images
+
+This directory contains images for the PromptWizard documentation. Below are descriptions of each image and what they should represent.
+
+## UI Images
+
+### promptwizard_banner.png
+A banner image for the top of the README with the PromptWizard logo and a visually appealing background.
+
+### promptwizard_ui_showcase.png
+A screenshot of the PromptWizard UI showing the tabbed interface with all the new features. This should be a high-quality screenshot of the actual UI.
+
+### ui_workflow.png
+A flowchart showing the workflow of using the PromptWizard UI, from entering the task description to getting the optimized prompt.
+
+### ui_screenshot.png
+A full screenshot of the main UI page showing the tabbed interface.
+
+### ui_basic_info.png
+A screenshot of the Basic Info tab showing the task description, base instruction, model, and API key fields.
+
+### ui_data_selection.png
+A screenshot of the Data Selection tab showing dataset selection and in-context examples configuration.
+
+### ui_prompt_config.png
+A screenshot of the Prompt Configuration tab showing scenario selection and advanced parameters.
+
+### ui_evaluation.png
+A screenshot of the Evaluation tab showing evaluation criteria and session management.
+
+### ui_results.png
+A screenshot of the results page showing the optimized prompt and performance metrics.
+
+### feature_comparison.png
+A comparison chart showing the features available in different optimization scenarios (Scenario 1, 2, and 3).
+
+### deployment_options.png
+A diagram showing the different deployment options for PromptWizard (local, Docker, cloud).
+
+### ui_features_diagram.png
+A diagram showing the relationships between different features in the UI.
+
+### tabs_workflow.png
+A flowchart showing how users navigate between the different tabs in the UI.
+
+### optimization_process.png
+A diagram showing the prompt optimization process flow from the UI perspective.
+
+### getting_started.png
+An illustration for the Getting Started section showing the setup process.
+
+### contributing.png
+An illustration for the Contributing section showing collaboration.
+
+### video_thumbnail.png
+A thumbnail image for the demo video with a play button overlay.
+
+### test_values_sequence.png
+A sequence diagram showing the flow of the Test Values feature, from clicking the button to displaying results.
+
+## API Images
+
+### api_architecture.png
+A diagram showing the architecture of the PromptWizard API and its components.
+
+### api_sequence_diagram.png
+A sequence diagram showing the interaction between the UI, API, and LLM services.
+
+### api_endpoints.png
+A diagram showing the different API endpoints and their functions.
+
+### optimize_prompt_flow.png
+A flowchart showing the process flow of the optimize_prompt endpoint.
+
+### health_endpoint.png
+A simple diagram showing the health check endpoint flow.
+
+## System Architecture Diagrams
+
+### system_architecture.png
+A comprehensive diagram showing the complete system architecture with all components and their interactions.
+
+### data_flow.png
+A diagram showing the data flow between components during the optimization process.
+
+### sequence_diagram.png
+A detailed sequence diagram showing the complete flow from user input to optimized prompt.
+
+### promptwizard_workflow.png
+A diagram illustrating the core workflow of Microsoft's PromptWizard framework.
+
+## Creating the Images
+
+You can create these images using:
+1. Screenshots of the actual UI
+2. Diagram tools like draw.io, Lucidchart, or Mermaid
+3. Design tools like Figma, Sketch, or Adobe Illustrator
+
+Please use a consistent style and color scheme across all images:
+- Primary color: #4F46E5 (indigo)
+- Secondary color: #8B5CF6 (purple)
+- Accent color: #EC4899 (pink)
+- Background: White or light gray
+- Text: Dark gray or black
+
+## Image Specifications
+
+- Format: PNG with transparent background where appropriate
+- Resolution: At least 1200px wide
+- Aspect ratio: Maintain a consistent aspect ratio across similar types of images
diff --git a/images/sequenceDiagram1.txt b/images/sequenceDiagram1.txt
new file mode 100644
index 00000000..4a76938a
--- /dev/null
+++ b/images/sequenceDiagram1.txt
@@ -0,0 +1,35 @@
+sequenceDiagram
+ participant User
+ participant UI as PromptWizard UI
+ participant API as Backend API
+ participant LLM as Language Model (Gemini/GPT)
+
+ Note over User,LLM: Initial Setup Phase
+ User->>UI: Enter task description
+ User->>UI: Enter/select base instruction
+ User->>UI: Set mutation rounds (N)
+ User->>UI: Set refinement steps (M)
+ User->>UI: Click "Optimize Prompt"
+
+ UI->>API: Send optimization request
+
+ Note over API,LLM: Optimization Process Begins
+
+ API->>API: Initialize parameters
+
+ loop For each mutation round (1 to N)
+ Note over API,LLM: Mutation Phase
+ API->>LLM: Send current prompt for mutation
+ LLM->>API: Return mutated prompt
+
+ Note over API,LLM: Refinement Phase
+ loop For each refinement step (1 to M)
+ API->>LLM: Send mutated prompt for refinement
+ LLM->>API: Return refined prompt
+ end
+
+ API->>API: Update current prompt with refined version
+ end
+
+ API->>UI: Return final optimized prompt
+ UI->>User: Display optimized prompt
\ No newline at end of file
diff --git a/images/sequenceDiagram2.txt b/images/sequenceDiagram2.txt
new file mode 100644
index 00000000..054e605d
--- /dev/null
+++ b/images/sequenceDiagram2.txt
@@ -0,0 +1,69 @@
+sequenceDiagram
+ title PromptWizard Test Values Feature Flow
+
+ actor User
+ participant UI as PromptWizard UI
+ participant API as Backend API
+ participant LLM as Gemini API
+
+ User->>+UI: Clicks "Test Values" button
+
+ rect rgb(240, 240, 255)
+ Note over UI: Form Auto-Population Phase
+ UI->>UI: Fill task description with image generation prompt
+ UI->>UI: Fill base instruction with Ghibli-style aesthetics
+ UI->>UI: Set model to Gemini
+ UI->>UI: Set refine steps to 2
+ UI->>UI: Configure optimization parameters
+ UI->>UI: Set evaluation criteria
+ end
+
+ UI-->>-User: Display populated form
+
+ User->>UI: Reviews pre-filled values
+ User->>+UI: Enters API key
+ UI->>UI: Validate API key
+ UI-->>-User: Show validation result
+
+ User->>+UI: Clicks "Optimize Prompt" button
+
+ rect rgb(255, 240, 240)
+ Note over UI,API: Request Processing Phase
+ UI->>+API: Send optimization request with pre-filled values
+ API->>API: Parse and validate request
+ API->>API: Prepare optimization parameters
+ end
+
+ rect rgb(240, 255, 240)
+ Note over API,LLM: Optimization Phase
+ API->>+LLM: Send request to Gemini API
+ LLM->>LLM: Generate prompt variations
+ LLM->>LLM: Perform mutation rounds
+ LLM->>LLM: Evaluate variations
+ LLM->>LLM: Refine best prompts
+ LLM-->>-API: Return optimized prompt
+ end
+
+ rect rgb(255, 255, 240)
+ Note over API,UI: Result Processing Phase
+ API->>API: Process optimization results
+ API->>API: Calculate performance metrics
+ API->>-UI: Return optimization results
+ end
+
+ UI->>UI: Format results for display
+ UI-->>-User: Display optimized prompt and performance metrics
+
+ rect rgb(240, 255, 255)
+ Note over User,UI: Post-Optimization Actions
+ User->>+UI: Click "Copy to Clipboard"
+ UI-->>-User: Prompt copied to clipboard
+
+ alt Export Configuration
+ User->>+UI: Click "Export Config"
+ UI->>UI: Generate YAML configuration
+ UI-->>-User: Download configuration file
+ end
+ end
+
+ Note over User: Ready to use optimized prompt in actual applications
\ No newline at end of file
diff --git a/images/test_values_sequence.txt b/images/test_values_sequence.txt
new file mode 100644
index 00000000..e700f264
--- /dev/null
+++ b/images/test_values_sequence.txt
@@ -0,0 +1,35 @@
+sequenceDiagram
+ title Test Values Feature Flow
+
+ actor User
+ participant UI as PromptWizard UI
+ participant API as Backend API
+ participant LLM as Gemini API
+
+ User->>UI: Clicks "Test Values" button
+
+ Note over UI: Form is populated with pre-configured values
+
+ UI->>UI: Fill task description with image generation prompt
+ UI->>UI: Fill base instruction with Ghibli-style aesthetics
+ UI->>UI: Set model to Gemini
+ UI->>UI: Configure optimization parameters
+
+ User->>UI: Reviews pre-filled values
+ User->>UI: Enters API key
+ User->>UI: Clicks "Optimize Prompt" button
+
+ UI->>API: Send optimization request
+
+ Note over API: Process request and prepare for optimization
+
+ API->>LLM: Send request to Gemini API
+
+ Note over LLM: Generate variations Perform mutation rounds Refine prompt
+
+ LLM->>API: Return optimized prompt
+ API->>UI: Return optimization results
+
+ UI->>User: Display optimized prompt and performance metrics
+
+ Note over User: Can export or copy optimized prompt
diff --git a/images/video_thumbnail.gif b/images/video_thumbnail.gif
new file mode 100644
index 00000000..52ae20aa
Binary files /dev/null and b/images/video_thumbnail.gif differ
diff --git a/images/video_thumbnail.png b/images/video_thumbnail.png
new file mode 100644
index 00000000..a65abfa0
Binary files /dev/null and b/images/video_thumbnail.png differ
diff --git a/install.bat b/install.bat
new file mode 100644
index 00000000..2f70828f
--- /dev/null
+++ b/install.bat
@@ -0,0 +1,15 @@
+@echo off
+echo Installing PromptWizard UI dependencies...
+
+echo Installing backend dependencies...
+cd api
+pip install -r requirements.txt
+cd ..
+
+echo Installing frontend dependencies...
+cd ui
+npm install
+cd ..
+
+echo Installation complete!
+echo Run 'start.bat' to start the application.
diff --git a/my_project_anonymizer/.env b/my_project_anonymizer/.env
new file mode 100644
index 00000000..139a64d3
--- /dev/null
+++ b/my_project_anonymizer/.env
@@ -0,0 +1,4 @@
+import os
+
+MODEL_TYPE="Gemini"
+GOOGLE_API_KEY=os.getenv("GOOGLE_API_KEY", "your-gemini-api-key-here")
diff --git a/my_project_anonymizer/configs/promptopt_config.yaml b/my_project_anonymizer/configs/promptopt_config.yaml
new file mode 100644
index 00000000..7215b226
--- /dev/null
+++ b/my_project_anonymizer/configs/promptopt_config.yaml
@@ -0,0 +1,50 @@
+prompt_technique_name: "critique_n_refine"
+unique_model_id: "gemini-2.0-flash"
+mutate_refine_iterations: 3
+mutation_rounds: 3
+refine_instruction: true
+refine_task_eg_iterations: 2
+top_n: 3
+min_correct_count: 2
+max_eval_batches: 5
+
+# Task Description
+task_description: "Optimize a prompt for identifying and returning coordinates of sensitive data in images for anonymization purposes"
+
+# Initial base instruction
+base_instruction: |
+ Please provide the coordinates for anonymizing any and all sensitive data (such as names, phone numbers, addresses, signatures, bank account details, faces, etc.) in the provided images.
+ Return the coordinates of rectangles that cover all potential sensitive data in JSON format, structured as follows:
+ {
+ "1st_image_name": {
+ "field1_name": [[x1, y1], [x2, y2]],
+ "field2_name": [[x3, y3], [x4, y4]],
+ ...
+ }
+ }
+ Where:
+ - "field1_name", "field2_name" are descriptive names for the sensitive data fields
+ - [x1, y1] is the top-left coordinate of the rectangle
+ - [x2, y2] is the bottom-right coordinate of the rectangle
+ Ensure you identify ALL sensitive information, including patient data, medical info, and personal identifiers.
+ ONLY return the JSON structure with no extra text.
+
+# Answer format specification
+answer_format: "JSON format containing image names as keys and nested objects with field names and coordinate pairs"
+
+# Evaluation criteria
+evaluation_criteria:
+ - "Completeness in identifying all types of sensitive data"
+ - "Accuracy of JSON structure"
+ - "Clarity of field naming"
+ - "Precision of coordinate specifications"
+ - "Adherence to JSON-only response format"
+
+# Optional features
+use_examples: false
+generate_synthetic_examples: false
+run_without_train_examples: true
+generate_expert_identity: true
+generate_intent_keywords: true
+
+
diff --git a/my_project_anonymizer/configs/setup_config.yaml b/my_project_anonymizer/configs/setup_config.yaml
new file mode 100644
index 00000000..2c341eda
--- /dev/null
+++ b/my_project_anonymizer/configs/setup_config.yaml
@@ -0,0 +1,12 @@
+llm:
+ model_type: "Gemini"
+ model_name: "gemini-2.0-flash"
+ temperature: 0.0
+ max_tokens: 1024
+dir_info:
+ base_dir: logs
+ log_dir_name: glue_logs
+experiment_name: my_task
+mode: offline
+description: "My custom prompt optimization task"
+
diff --git a/my_project_anonymizer/run_optimization.py b/my_project_anonymizer/run_optimization.py
new file mode 100644
index 00000000..e7674937
--- /dev/null
+++ b/my_project_anonymizer/run_optimization.py
@@ -0,0 +1,110 @@
+import os
+from dotenv import load_dotenv
+import yaml
+import google.generativeai as genai
+from dataclasses import dataclass
+
+# Load environment variables from system
+load_dotenv()
+
+@dataclass
+class PromptOptimizationParams:
+ task_description: str
+ base_instruction: str
+ answer_format: str
+ max_iterations: int
+ evaluation_criteria: list
+ temperature: float
+
+class PromptOptimizer:
+ def __init__(self, setup_config, task_description):
+ self.setup_config = setup_config
+ self.task_description = task_description
+
+ # Initialize Gemini
+ api_key = os.environ.get("GEMINI_API_KEY")
+ if not api_key or api_key == "your-gemini-api-key":
+ print("\nERROR: Valid GOOGLE_API_KEY not found in system environment variables.")
+ print("Please set a valid Gemini API key in your system environment.")
+ print("You can get an API key from https://ai.google.dev/")
+ print("\nFor testing purposes, we'll continue with a mock optimization.")
+ self.use_mock = True
+ else:
+ self.use_mock = False
+ genai.configure(api_key=api_key)
+ self.model = genai.GenerativeModel('gemini-2.0-flash')
+
+ def optimize_prompt(self, base_prompt, criteria):
+ if self.use_mock:
+ # Return a mock optimized prompt for testing
+ print(f"\nMock optimization iteration:")
+ print(f"Base prompt: {base_prompt[:50]}...")
+ print(f"Criteria: {', '.join(criteria)}")
+
+ # Create a simple mock optimization by adding some text
+ optimized = base_prompt + "\n\nAdditional instructions: Please ensure all sensitive data is properly identified and coordinates are precise to the pixel level."
+ return optimized
+ else:
+ # Use the actual Gemini model
+ chat = self.model.start_chat(history=[])
+
+ optimization_prompt = f"""
+ Task: {self.task_description}
+ Base prompt: {base_prompt}
+ Evaluation criteria: {', '.join(criteria)}
+
+ Please optimize this prompt to better meet the evaluation criteria.
+ Return only the optimized prompt without any explanations.
+ """
+
+ response = chat.send_message(optimization_prompt)
+ return response.text
+
+ def get_best_prompt(self, params):
+ current_prompt = params.base_instruction
+
+ print(f"\nStarting prompt optimization with {params.max_iterations} iterations")
+ print(f"Task: {params.task_description}")
+
+ for i in range(params.max_iterations):
+ print(f"\nIteration {i+1}/{params.max_iterations}:")
+ optimized_prompt = self.optimize_prompt(
+ current_prompt,
+ params.evaluation_criteria
+ )
+ current_prompt = optimized_prompt
+
+ return current_prompt, None
+
+def main():
+ # Load configurations
+ with open('configs/promptopt_config.yaml', 'r') as f:
+ prompt_config = yaml.safe_load(f)
+
+ with open('configs/setup_config.yaml', 'r') as f:
+ setup_config = yaml.safe_load(f)
+
+ # Extract only the needed parameters from the config
+ params = PromptOptimizationParams(
+ task_description=prompt_config['task_description'],
+ base_instruction=prompt_config['base_instruction'],
+ answer_format=prompt_config['answer_format'],
+ evaluation_criteria=prompt_config['evaluation_criteria'],
+ max_iterations=prompt_config.get('mutate_refine_iterations', 3), # Default to 3 if not found
+ temperature=setup_config['llm'].get('temperature', 0.0) # Default to 0.0 if not found
+ )
+
+ # Initialize optimizer
+ optimizer = PromptOptimizer(
+ setup_config=setup_config,
+ task_description=params.task_description
+ )
+
+ # Run optimization
+ best_prompt, _ = optimizer.get_best_prompt(params=params)
+
+ print("\nBest optimized prompt:")
+ print(best_prompt)
+
+if __name__ == "__main__":
+ main()
diff --git a/promptwizard.egg-info/PKG-INFO b/promptwizard.egg-info/PKG-INFO
new file mode 100644
index 00000000..b1292de6
--- /dev/null
+++ b/promptwizard.egg-info/PKG-INFO
@@ -0,0 +1,331 @@
+Metadata-Version: 2.4
+Name: promptwizard
+Version: 0.2.2
+Summary: Optimize Prompt
+Home-page: https://github.com/microsoft/PromptWizard
+Author: The PromptWizard team
+Author-email: promptwizard@microsoft.com
+License: MIT License
+Keywords: PromptWizard
+Classifier: Intended Audience :: Science/Research
+Classifier: Development Status :: 3 - Alpha
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.8.0
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: datasets
+Requires-Dist: tiktoken
+Requires-Dist: nltk
+Requires-Dist: openai
+Requires-Dist: azure-identity
+Requires-Dist: azure-search-documents
+Requires-Dist: pyyaml~=6.0.1
+Requires-Dist: pyarrow==15.0.2
+Requires-Dist: llama-index==0.11.10
+Requires-Dist: llama-index-core==0.11.10
+Requires-Dist: python-dotenv
+Provides-Extra: dev
+Requires-Dist: datasets; extra == "dev"
+Requires-Dist: tiktoken; extra == "dev"
+Requires-Dist: nltk; extra == "dev"
+Requires-Dist: openai; extra == "dev"
+Requires-Dist: azure-identity; extra == "dev"
+Requires-Dist: azure-search-documents; extra == "dev"
+Requires-Dist: pyyaml~=6.0.1; extra == "dev"
+Requires-Dist: pyarrow==15.0.2; extra == "dev"
+Requires-Dist: llama-index==0.11.10; extra == "dev"
+Requires-Dist: llama-index-core==0.11.10; extra == "dev"
+Requires-Dist: python-dotenv; extra == "dev"
+Requires-Dist: black==21.4b0; extra == "dev"
+Requires-Dist: flake8>=3.8.3; extra == "dev"
+Requires-Dist: isort>=5.5.4; extra == "dev"
+Requires-Dist: pre-commit; extra == "dev"
+Requires-Dist: pytest; extra == "dev"
+Requires-Dist: pytest-xdist; extra == "dev"
+Provides-Extra: quality
+Requires-Dist: black==21.4b0; extra == "quality"
+Requires-Dist: flake8>=3.8.3; extra == "quality"
+Requires-Dist: isort>=5.5.4; extra == "quality"
+Requires-Dist: pre-commit; extra == "quality"
+Requires-Dist: pytest; extra == "quality"
+Requires-Dist: pytest-xdist; extra == "quality"
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: keywords
+Dynamic: license
+Dynamic: license-file
+Dynamic: provides-extra
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
+
+
+# PromptWizard 🧙
+
+
+
+
+PromptWizard is a discrete prompt optimization framework that employs a self-evolving mechanism where the LLM generates, critiques, and refines its own prompts and examples, continuously improving through iterative feedback and synthesis. This self-adaptive approach ensures holistic optimization by evolving both the instructions and in-context learning examples for better task performance.
+
+Three key components of PromptWizard are te following :
+
+- Feedback-driven Refinement: LLM generates, critiques, and refines its own prompts and examples, continuously improving through iterative feedback and synthesis
+- Critique and Synthesize diverse examples: Generates synthetic examples that are robust, diverse and task-aware. Also it optimizes both prompt and examples in tandem
+- Self generated Chain of Thought (CoT) steps with combination of positive, negative and synthetic examples
+
+
Stage 1: Iterative optimization of instructions
+
+
+
+
+
Stage 2: Sequential optimization of instruction and examples
+
+
+
+
+## Installation ⬇️
+
+Follow these steps to set up the development environment and install the package:
+
+1) Clone the repository
+ ```
+ git clone https://github.com/microsoft/PromptWizard
+ cd PromptWizard
+ ```
+2) Create and activate a virtual environment
+
+ On Windows
+ ```
+ python -m venv venv
+ venv\Scripts\activate
+ ```
+ On macOS/Linux:
+ ```
+ python -m venv venv
+ source venv/bin/activate
+ ```
+3) Install the package in development mode:
+ ```
+ pip install -e .
+ ```
+
+
+## Quickstart 🏃
+
+There are three main ways to use PromptWizard:
+- Scenario 1 : Optimizing prompts without examples
+- Scenario 2 : Generating synthetic examples and using them to optimize prompts
+- Scenario 3 : Optimizing prompts with training data
+
+**NOTE** : Refer this [notebook](demos/scenarios/dataset_scenarios_demo.ipynb) to get a detailed understanding of the usage for each of the scenarios. **This serves as a starting point to understand the usage of PromptWizard**
+
+#### High level overview of using PromptWizard
+- Decide your scenario
+- Fix the configuration and environmental varibles for API calling
+ - Use ```promptopt_config.yaml``` to set configurations. For example for GSM8k this [file](demos/gsm8k/configs/promptopt_config.yaml) can be used
+ - Use ```.env``` to set environmental varibles. For GSM8k this [file](demos/gsm8k/.env) can be used
+ ```
+ USE_OPENAI_API_KEY="XXXX"
+ # Replace with True/False based on whether or not to use OPENAI API key
+
+ # If the first variable is set to True then fill the following two
+ OPENAI_API_KEY="XXXX"
+ OPENAI_MODEL_NAME ="XXXX"
+
+ # If the first variable is set to False then fill the following three
+ AZURE_OPENAI_ENDPOINT="XXXXX"
+ # Replace with your Azure OpenAI Endpoint
+
+ OPENAI_API_VERSION="XXXX"
+ # Replace with the version of your API
+
+ AZURE_OPENAI_CHAT_DEPLOYMENT_NAME="XXXXX"
+ # Create a deployment for the model and place the deployment name here.
+ ```
+- Run the code
+ - To run PromptWizard on your custom dataset please jump [here](#run-on-custom-dataset)
+
+#### Running PromptWizard with training data (Scenario 3)
+- We support [GSM8k](https://huggingface.co/datasets/openai/gsm8k), [SVAMP](https://huggingface.co/datasets/ChilleD/SVAMP), [AQUARAT](https://huggingface.co/datasets/deepmind/aqua_rat) and [Instruction_Induction(BBII)](https://github.com/xqlin98/INSTINCT/tree/main/Induction/experiments/data/instruction_induction/raw) datasets
+- Please note that time taken for prompt optimzation is dependent on the dataset. In our experiments for the above mentioned datasets, it took around 20 - 30 minutes on average.
+
+#### Running on GSM8k (AQUARAT/SVAMP)
+
+- Please note that this code requires access to LLMs via API calling for which we support AZURE endpoints or OPENAI keys
+- Set the AZURE endpoint configurations in [.env](demos/gsm8k/.env)
+- Follow the steps in [demo.ipynb](demos/gsm8k/demo.ipynb) to download the data, run the prompt optimization and carry out inference.
+
+#### Running on BBII
+
+- BBII has many datasets in it, based on the dataset set the configs [here](demos/bbh/configs/promptopt_config.yaml)
+- In configs ```task_description```,```base_instruction``` and ```answer_format``` need to be changed for different datasets in BBII, the rest of the configs remain the same
+- A demo is presented in [demo.ipynb](demos/bbh/demo.ipynb)
+
+
+
+## Run on Custom Datasets 🗃️
+
+### Create Custom Dataset
+- Our code expects the dataset to be in ```.jsonl``` file format
+- Both the train and test set follow the same format
+- Every sample in the ```.jsonl``` should have 2 fields :
+ 1) ```question``` : It should contain the complete question that is to asked to the LLM
+ 2) ```answer``` : It should contain the ground truth answer which can be verbose or concise
+
+
+### Run on Custom Dataset
+
+NOTE : Refer to [demos](demos) folder for examples of folders for four datasets. The ```.ipynb``` in each of the folders shows how to run PromptWizard on that particular dataset. A similar procedure can be followed for a new dataset. Below is the explanation of each of the components of the ```.ipynb``` and the dataset specifc folder structure in detail
+
+#### Steps to be followed for custom datasets
+
+1) Every new dataset needs to have the following
+ - ```configs``` folder to store files for defining optimization hyperparameters and setup configs
+ - ```data``` folder to store ```train.jsonl``` and ```test.jsonl``` as curated [here](#create-custom-dataset) (this is done in the notebooks)
+ - ```.env``` file for environment varibles to be used for API calling
+ - ```.py/.ipynb``` script to run the code
+
+2) Set the hyperparameters like number of mutations, refine steps, in-context examples etc.
+ - Set the following in [promptopt_config.yaml](demos/gsm8k/configs/promptopt_config.yaml) :
+ - ```task_description``` : Desciption of the task at hand which will be fed into the prompt
+ - For GSM8k a description like the following can be used
+ ```
+ You are a mathematics expert. You will be given a mathematics problem which you need to solve
+ ```
+ - ```base_instruction``` : Base instruction in line with the dataset
+ - A commonly used base instruction could be
+ ```
+ Lets think step by step.
+ ```
+ - ```answer_format``` : Instruction for specifying the answer format
+ - It is crucial to set the ```answer_format``` properly to ensure correct extraction by ```def extract_final_answer()```
+ - Answer format could be :
+ ```
+ At the end, wrap only your final option between and tags
+ ```
+ Then in ```def extract_final_answer()``` we can simply write code to extract string between the tags
+
+ - ```seen_set_size``` : The number of train samples to be used for prompt optimization
+ - In our experiments we set this to be 25. In general any number between 20-50 would work
+ - ```few_shot_count``` : The number of in-context examples needed in the prompt
+ - The value can be set to any positive integer based on the requirement
+ - For generating zero-shot prompts, set the values to a small number (i.e between 2-5) and after the final prompt is generated the in-context examples can be removed. We suggest using some in-context examples as during the optimization process the instructions in the prompt are refined using in-context examples hence setting it to a small number will give better zero-shot instructions in the prompt
+ - ```generate_reasoning``` : Whether or not to generate reasoning for the in-context examples
+ - In our experiments we found it to improve the prompt overall as it provides a step-by-step approach to reach the final answer. However if there is a constraint on the prompt length or number of prompt tokens, it can be turned off to get smaller sized prompts
+ - ```generate_expert_identity``` and ```generate_intent_keywords``` : Having these helped improve the prompt as they help making the prompt relevant to the task
+ - Refer ```promptopt_config.yaml``` files in folders present [here](demos) for the descriptions used for AQUARAT, SVAMP and GSM8k. For BBII refer [description.py](demos/bbh/description.py) which has the meta instructions for each of the datasets
+ - Following are the global parameters which can be set based on the availability of the training data
+ - ```run_without_train_examples``` is a global hyperparameter which can be used when there are no training samples and in-context examples are not required in the final prompt
+ - ```generate_synthetic_examples``` is a global hyperparameter which can be used when there are no training samples and we want to generate synthetic data for training
+ - ```use_examples``` is a global hyperparameter which can be used to optimize prompts using training data
+3) Create a dataset specific class which inherits ```class DatasetSpecificProcessing``` similar to ```GSM8k(DatasetSpecificProcessing)``` in [demo.ipynb](demos/gsm8k/demo.ipynb) and define the following functions in it
+ 1) In ```def extract_answer_from_output()``` : This is a dataset specific function, given the ```answer``` from the dataset it should extract and return a concise form of the answer. Note that based on the dataset it can also simply return the ```answer``` as it is like in case of SVAMP and AQUARAT datasets
+ 2) ```def extract_final_answer()``` : This is a LLM output specific function, given the verbose answer from the LLM it should extract and return the concise final answer
+ 3) Define ```def access_answer()``` : This function takes an input the LLM output, then does the following:
+ - Extracts the concise answer using ```def extract_final_answer()``` from the LLM output as defined above
+ - Evaluates the extracted answer with the ground truth and retuns
+ - Extracted answer from LLM output
+ - Boolean value indicating if answer is correct or not
+ - The evaluation done here is dataset specific, for datasets like GSM8k, SVAMP and AQUARAT which have final answer as an number, we can do a direct match between the numbers generated and the ground truth, while for datasets where the answer is a sentence or paragraph it would be better to do evaluation with llm-as-a-judge, to compare the generated and ground truth paragraph/sentence. An example is available in ```def access_answer()``` in [this](demos/bbh/demo.ipynb) notebook
+
+
+## How PromptWizard Works 🔍
+- Using the problem description and initial prompt instruction, PW generates variations of the instruction by prompting LLMs to mutate it. Based on performance, the best prompt is selected. PW incorporates a critique component that provides feedback, thus guiding and refining the prompt over multiple iterations.
+- PW also optimizes in-context examples. PW selects a diverse set of examples
+from the training data, identifying positive and negative examples based on their performance with
+the modified prompt. Negative examples help inform further prompt refinements.
+- Examples and instructions are sequentially optimized, using the critique to generate synthetic examples that address the current prompt’s weaknesses. These examples are integrated to further refine the prompt.
+- PW generates detailed reasoning chains via Chain-of-Thought (CoT), enriching the prompt’s capacity for problem-solving.
+- PW aligns prompts with human reasoning by integrating task intent and expert
+personas, enhancing both model performance and interpretability.
+
+## Configurations ⚙️
+
+Here we define the various hyperparameters used in prompt optimization process found in [promptopt_config.yaml](demos/gsm8k/configs/promptopt_config.yaml)
+
+- ```mutate_refine_iterations```: Number of iterations for conducting mutation of task description
+ followed by refinement of instructions
+- ```mutation_rounds```: Number of rounds of mutation to be performed when generating different styles
+- ```refine_task_eg_iterations```: Number of iterations for refining task description and in context examples
+- ```style_variation```: Number of thinking style variations to be used in prompt mutation
+- ```questions_batch_size```: Number of questions to be asked to LLM in a single batch, during training step
+- ```min_correct_count```: Minimum number of batches of questions to correctly answered, for a prompt to be considered as performing good
+- ```max_eval_batches```: Maximum number of mini-batches on which we should evaluate the prompt
+- ```top_n```: Number of top best prompts to be considered from scoring stage for the next stage
+- ```seen_set_size```: Number of samples from trainset to be used for training
+- ```few_shot_count```: Number of in-context examples required in final prompt
+
+## Best Practices 💡
+
+Following are some of best pracitices we followed during are experiments
+- Regarding the parameters in [promptopt_config.yaml](demos/gsm8k/configs/promptopt_config.yaml)
+ - We found the best performing values for ```mutate_refine_iterations```,```mutation_rounds```,```refine_task_eg_iterations``` to be 3 or 5
+ - Other parameters have been set to their ideal values. ```seen_set_size``` can be increased to 50 and ```few_shot_count``` can be set based on the use case
+- The prompts generated at the end of the training process are usually very detailed, however user supervision can help tune it further for the task at hand
+- Trying both configurations of having synthetic in-context examples or in-context examples from the train set can be tried to find the best prompt based on use case.
+
+## Results 📈
+
+
+
+
PromptWizard consistently outperforms other methods across various
+thresholds, maintaining the highest p(τ) values, indicating that it consistently performs near the best
+possible accuracy across all tasks
+
+
+
+- The fiqure shows the performance profile curve for the instruction induction
+tasks. The performance profile curve visualizes how frequently
+different approaches’ performance is within a given distance of the best performance. In this curve,
+the x-axis (τ) represents the performance ratio relative to the best-performing method, and the y-axis
+(p(τ )) reflects the fraction of tasks where a method’s performance is within this ratio. So for a given
+method, the curve tells what percentage of the tasks are within τ distance to the best performance.
+
+
+## How to contribute: ✋
+This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit https://cla.microsoft.com.
+When you submit a pull request, a CLA-bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repositories using our CLA.
+This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact opencode@microsoft.com with any additional questions or comments.
+
+## Citation 📝
+
+If you make use of our work, please cite our paper:
+
+```
+@misc{agarwal2024promptwizardtaskawarepromptoptimization,
+ title={PromptWizard: Task-Aware Prompt Optimization Framework},
+ author={Eshaan Agarwal and Joykirat Singh and Vivek Dani and Raghav Magazine and Tanuja Ganu and Akshay Nambi},
+ year={2024},
+ eprint={2405.18369},
+ archivePrefix={arXiv},
+ primaryClass={cs.CL},
+ url={https://arxiv.org/abs/2405.18369},
+}
+```
+## Responsible AI Considerations
+For guidelines and best practices related to Responsible AI, please refer to our [Responsible AI Guidelines](RESPONSIBLE_AI.md).
+
diff --git a/promptwizard.egg-info/SOURCES.txt b/promptwizard.egg-info/SOURCES.txt
new file mode 100644
index 00000000..a9285b14
--- /dev/null
+++ b/promptwizard.egg-info/SOURCES.txt
@@ -0,0 +1,13 @@
+LICENSE
+README.md
+pyproject.toml
+setup.cfg
+setup.py
+./promptwizard/__init__.py
+./promptwizard/version.py
+promptwizard.egg-info/PKG-INFO
+promptwizard.egg-info/SOURCES.txt
+promptwizard.egg-info/dependency_links.txt
+promptwizard.egg-info/not-zip-safe
+promptwizard.egg-info/requires.txt
+promptwizard.egg-info/top_level.txt
\ No newline at end of file
diff --git a/promptwizard.egg-info/dependency_links.txt b/promptwizard.egg-info/dependency_links.txt
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/promptwizard.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/promptwizard.egg-info/not-zip-safe b/promptwizard.egg-info/not-zip-safe
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/promptwizard.egg-info/not-zip-safe
@@ -0,0 +1 @@
+
diff --git a/promptwizard.egg-info/requires.txt b/promptwizard.egg-info/requires.txt
new file mode 100644
index 00000000..3fc7c4d0
--- /dev/null
+++ b/promptwizard.egg-info/requires.txt
@@ -0,0 +1,38 @@
+datasets
+tiktoken
+nltk
+openai
+azure-identity
+azure-search-documents
+pyyaml~=6.0.1
+pyarrow==15.0.2
+llama-index==0.11.10
+llama-index-core==0.11.10
+python-dotenv
+
+[dev]
+datasets
+tiktoken
+nltk
+openai
+azure-identity
+azure-search-documents
+pyyaml~=6.0.1
+pyarrow==15.0.2
+llama-index==0.11.10
+llama-index-core==0.11.10
+python-dotenv
+black==21.4b0
+flake8>=3.8.3
+isort>=5.5.4
+pre-commit
+pytest
+pytest-xdist
+
+[quality]
+black==21.4b0
+flake8>=3.8.3
+isort>=5.5.4
+pre-commit
+pytest
+pytest-xdist
diff --git a/promptwizard.egg-info/top_level.txt b/promptwizard.egg-info/top_level.txt
new file mode 100644
index 00000000..53c80dec
--- /dev/null
+++ b/promptwizard.egg-info/top_level.txt
@@ -0,0 +1 @@
+promptwizard
diff --git a/promptwizard/glue/common/llm/llm_mgr.py b/promptwizard/glue/common/llm/llm_mgr.py
index c5cec9cd..8a35510f 100644
--- a/promptwizard/glue/common/llm/llm_mgr.py
+++ b/promptwizard/glue/common/llm/llm_mgr.py
@@ -1,69 +1,80 @@
+import os
+import google.generativeai as genai
from typing import Dict
from llama_index.core.callbacks import CallbackManager, TokenCountingHandler
from llama_index.core.llms import ChatMessage
from llama_index.core.llms import LLM
-from tenacity import retry, stop_after_attempt, wait_fixed, wait_random
from ..base_classes import LLMConfig
-from ..constants.str_literals import InstallLibs, OAILiterals, \
- OAILiterals, LLMLiterals, LLMOutputTypes
-from .llm_helper import get_token_counter
+from ..constants.str_literals import InstallLibs, OAILiterals, LLMLiterals, LLMOutputTypes
from ..exceptions import GlueLLMException
-from ..utils.runtime_tasks import install_lib_if_missing
+from ..utils.runtime_tasks import install_lib_if_missing, str_to_class
from ..utils.logging import get_glue_logger
-from ..utils.runtime_tasks import str_to_class
-import os
-logger = get_glue_logger(__name__)
-def call_api(messages):
+logger = get_glue_logger(__name__)
+def call_openai_api(messages):
from openai import OpenAI
from azure.identity import get_bearer_token_provider, AzureCliCredential
from openai import AzureOpenAI
- if os.environ['USE_OPENAI_API_KEY'] == "True":
+ if os.environ.get('USE_OPENAI_API_KEY') == "True":
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
-
response = client.chat.completions.create(
- model=os.environ["OPENAI_MODEL_NAME"],
- messages=messages,
- temperature=0.0,
+ model=os.environ["OPENAI_MODEL_NAME"],
+ messages=messages,
+ temperature=0.0,
)
else:
token_provider = get_bearer_token_provider(
- AzureCliCredential(), "https://cognitiveservices.azure.com/.default"
- )
+ AzureCliCredential(), "https://cognitiveservices.azure.com/.default"
+ )
client = AzureOpenAI(
api_version=os.environ["OPENAI_API_VERSION"],
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
azure_ad_token_provider=token_provider
- )
+ )
response = client.chat.completions.create(
model=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
messages=messages,
temperature=0.0,
)
- prediction = response.choices[0].message.content
- return prediction
-
+ return response.choices[0].message.content
+
+def call_gemini_api(messages):
+ try:
+ api_key = os.getenv("GOOGLE_API_KEY")
+ if not api_key:
+ raise GlueLLMException("GOOGLE_API_KEY environment variable not set")
+
+ genai.configure(api_key=api_key)
+ model = genai.GenerativeModel('gemini-2.0-flash')
+ chat = model.start_chat(history=[])
+
+ # Convert messages to Gemini format and maintain conversation
+ for message in messages:
+ if message["role"] in ["system", "user", "assistant"]:
+ response = chat.send_message(message["content"])
+
+ return response.text
+ except Exception as e:
+ logger.error(f"Error in Gemini API call: {str(e)}")
+ raise GlueLLMException("Failed to get response from Gemini", e)
class LLMMgr:
@staticmethod
def chat_completion(messages: Dict):
- llm_handle = os.environ.get("MODEL_TYPE", "AzureOpenAI")
+ llm_handle = os.getenv("MODEL_TYPE", "AzureOpenAI")
try:
- if(llm_handle == "AzureOpenAI"):
- # Code to for calling LLMs
- return call_api(messages)
- elif(llm_handle == "LLamaAML"):
- # Code to for calling SLMs
- return 0
+ if llm_handle == "AzureOpenAI":
+ return call_openai_api(messages)
+ elif llm_handle == "Gemini":
+ return call_gemini_api(messages)
+ else:
+ raise GlueLLMException(f"Unsupported model type: {llm_handle}")
except Exception as e:
- print(e)
+ logger.error(f"Error in chat completion: {str(e)}")
return "Sorry, I am not able to understand your query. Please try again."
- # raise GlueLLMException(f"Exception when calling {llm_handle.__class__.__name__} "
- # f"LLM in chat mode, with message {messages} ", e)
-
@staticmethod
def get_all_model_ids_of_type(llm_config: LLMConfig, llm_output_type: str):
@@ -88,25 +99,25 @@ def get_llm_pool(llm_config: LLMConfig) -> Dict[str, LLM]:
which can be used as handle to that LLM
"""
llm_pool = {}
- az_llm_config = llm_config.azure_open_ai
-
- if az_llm_config:
+
+ # Handle Azure OpenAI configuration
+ if llm_config.azure_open_ai:
install_lib_if_missing(InstallLibs.LLAMA_LLM_AZ_OAI)
install_lib_if_missing(InstallLibs.LLAMA_EMB_AZ_OAI)
install_lib_if_missing(InstallLibs.LLAMA_MM_LLM_AZ_OAI)
install_lib_if_missing(InstallLibs.TIKTOKEN)
import tiktoken
- # from llama_index.llms.azure_openai import AzureOpenAI
from openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
from llama_index.multi_modal_llms.azure_openai import AzureOpenAIMultiModal
az_token_provider = None
- # if az_llm_config.use_azure_ad:
from azure.identity import get_bearer_token_provider, AzureCliCredential
- az_token_provider = get_bearer_token_provider(AzureCliCredential(),
- "https://cognitiveservices.azure.com/.default")
+ az_token_provider = get_bearer_token_provider(
+ AzureCliCredential(),
+ "https://cognitiveservices.azure.com/.default"
+ )
for azure_oai_model in az_llm_config.azure_oai_models:
callback_mgr = None
@@ -158,22 +169,61 @@ def get_llm_pool(llm_config: LLMConfig) -> Dict[str, LLM]:
max_new_tokens=4096
)
+ # Handle Gemini configuration
+ if hasattr(llm_config, 'gemini') and llm_config.gemini:
+ try:
+ install_lib_if_missing("google-generativeai>=0.3.0")
+ from llama_index.llms.gemini import Gemini
+ from llama_index.multi_modal_llms.gemini import GeminiMultiModal
+
+ api_key = os.getenv("GOOGLE_API_KEY")
+ if not api_key:
+ raise GlueLLMException("GOOGLE_API_KEY environment variable not set")
+
+ # Configure Gemini
+ gemini_config = llm_config.gemini
+ for gemini_model in gemini_config.models:
+ if gemini_model.model_type == LLMOutputTypes.CHAT:
+ llm_pool[gemini_model.unique_model_id] = Gemini(
+ api_key=api_key,
+ model_name=gemini_model.model_name,
+ temperature=gemini_config.temperature or 0.0,
+ max_tokens=gemini_config.max_tokens,
+ )
+ elif gemini_model.model_type == LLMOutputTypes.MULTI_MODAL:
+ llm_pool[gemini_model.unique_model_id] = GeminiMultiModal(
+ api_key=api_key,
+ model_name=gemini_model.model_name,
+ temperature=gemini_config.temperature or 0.0,
+ max_tokens=gemini_config.max_tokens,
+ )
+ except Exception as e:
+ logger.error(f"Failed to initialize Gemini models: {str(e)}")
+ raise GlueLLMException("Failed to initialize Gemini models", e)
+
+ # Handle custom models
if llm_config.custom_models:
for custom_model in llm_config.custom_models:
- # try:
- custom_llm_class = str_to_class(custom_model.class_name, None, custom_model.path_to_py_file)
-
- callback_mgr = None
- if custom_model.track_tokens:
- # If we need to count number of tokens used in LLM calls
- token_counter = TokenCountingHandler(
- tokenizer=custom_llm_class.get_tokenizer()
+ try:
+ custom_llm_class = str_to_class(
+ custom_model.class_name,
+ None,
+ custom_model.path_to_py_file
+ )
+ callback_mgr = None
+ if custom_model.track_tokens:
+ token_counter = TokenCountingHandler(
+ tokenizer=custom_llm_class.get_tokenizer()
)
- callback_mgr = CallbackManager([token_counter])
- token_counter.reset_counts()
- llm_pool[custom_model.unique_model_id] = custom_llm_class(callback_manager=callback_mgr)
- # except Exception as e:
- # raise GlueLLMException(f"Custom model {custom_model.unique_model_id} not loaded.", e)
+ callback_mgr = CallbackManager([token_counter])
+ token_counter.reset_counts()
+ llm_pool[custom_model.unique_model_id] = custom_llm_class(
+ callback_manager=callback_mgr
+ )
+ except Exception as e:
+ logger.error(f"Failed to load custom model {custom_model.unique_model_id}: {str(e)}")
+ raise GlueLLMException(f"Custom model {custom_model.unique_model_id} not loaded.", e)
+
return llm_pool
@staticmethod
diff --git a/start.bat b/start.bat
new file mode 100644
index 00000000..8e3a3ef1
--- /dev/null
+++ b/start.bat
@@ -0,0 +1,12 @@
+@echo off
+echo Starting PromptWizard UI...
+
+echo Starting backend API...
+start cmd /k "cd api && python app.py"
+
+echo Starting frontend...
+start cmd /k "cd ui && npm run dev"
+
+echo PromptWizard UI started!
+echo Backend API: http://localhost:5000
+echo Frontend: http://localhost:3000
diff --git a/ui/.gitignore b/ui/.gitignore
new file mode 100644
index 00000000..5ef6a520
--- /dev/null
+++ b/ui/.gitignore
@@ -0,0 +1,41 @@
+# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
+
+# dependencies
+/node_modules
+/.pnp
+.pnp.*
+.yarn/*
+!.yarn/patches
+!.yarn/plugins
+!.yarn/releases
+!.yarn/versions
+
+# testing
+/coverage
+
+# next.js
+/.next/
+/out/
+
+# production
+/build
+
+# misc
+.DS_Store
+*.pem
+
+# debug
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+.pnpm-debug.log*
+
+# env files (can opt-in for committing if needed)
+.env*
+
+# vercel
+.vercel
+
+# typescript
+*.tsbuildinfo
+next-env.d.ts
diff --git a/ui/Dockerfile b/ui/Dockerfile
new file mode 100644
index 00000000..191790fe
--- /dev/null
+++ b/ui/Dockerfile
@@ -0,0 +1,53 @@
+FROM node:18-alpine AS base
+
+# Install dependencies only when needed
+FROM base AS deps
+WORKDIR /app
+
+# Copy package.json and package-lock.json
+COPY package.json package-lock.json* ./
+
+# Install dependencies
+RUN npm ci
+
+# Rebuild the source code only when needed
+FROM base AS builder
+WORKDIR /app
+COPY --from=deps /app/node_modules ./node_modules
+COPY . .
+
+# Set environment variables
+ENV NEXT_TELEMETRY_DISABLED 1
+ENV NODE_ENV production
+
+# Build the application
+RUN npm run build
+
+# Production image, copy all the files and run next
+FROM base AS runner
+WORKDIR /app
+
+ENV NODE_ENV production
+ENV NEXT_TELEMETRY_DISABLED 1
+
+RUN addgroup --system --gid 1001 nodejs
+RUN adduser --system --uid 1001 nextjs
+
+COPY --from=builder /app/public ./public
+
+# Set the correct permission for prerender cache
+RUN mkdir .next
+RUN chown nextjs:nodejs .next
+
+# Automatically leverage output traces to reduce image size
+COPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./
+COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static
+
+USER nextjs
+
+EXPOSE 3000
+
+ENV PORT 3000
+ENV HOSTNAME "0.0.0.0"
+
+CMD ["node", "server.js"]
diff --git a/ui/README.md b/ui/README.md
new file mode 100644
index 00000000..a279daae
--- /dev/null
+++ b/ui/README.md
@@ -0,0 +1,134 @@
+# PromptWizard UI 🧙♂️✨
+
+A modern, user-friendly web interface for the PromptWizard prompt optimization framework.
+
+
+
+
+
+## Features
+
+
+
+
+ PromptWizard UI features and their relationships
+
+
+### Tabbed Interface
+- **Basic Info**: Configure task description, base instruction, answer format, model, and API key
+- **Data Selection**: Choose datasets, configure in-context examples, and preview data
+- **Prompt Configuration**: Select optimization scenarios and configure advanced parameters
+- **Evaluation**: Set evaluation criteria and manage optimization sessions
+
+
+
+
+ The workflow between different tabs in the UI
+
+
+### Advanced Features
+- **Advanced Optimization Parameters**: Fine-tune the optimization process with parameters like mutate refine iterations, refine task examples iterations, and more
+- **Advanced Evaluation Metrics**: Use metrics like Faithfulness, Semantic Similarity, Context Relevancy, and more
+- **Dataset Preview**: Visualize and inspect your dataset before optimization
+- **Multimodal Support**: Optimize prompts for image-based tasks with image uploads
+- **Session Management**: Save and load optimization sessions for later use
+
+
+ );
+};
+
+export default Tooltip;
diff --git a/ui/src/utils/exportConfig.js b/ui/src/utils/exportConfig.js
new file mode 100644
index 00000000..99aba715
--- /dev/null
+++ b/ui/src/utils/exportConfig.js
@@ -0,0 +1,53 @@
+/**
+ * Exports the form data as a YAML configuration file
+ * @param {Object} formData - The form data to export
+ */
+export const exportConfigAsYaml = (formData) => {
+ // Convert form data to YAML format
+ const yaml = `prompt_technique_name: "critique_n_refine"
+unique_model_id: "${formData.model === 'Gemini' ? 'gemini-2.0-flash' : formData.model === 'GPT-4' ? 'gpt-4' : 'custom-model'}"
+mutate_refine_iterations: ${formData.mutationRounds}
+mutation_rounds: ${formData.mutationRounds}
+refine_instruction: true
+refine_task_eg_iterations: ${formData.refineSteps}
+top_n: 3
+min_correct_count: 2
+max_eval_batches: 5
+
+# Task Description
+task_description: "${formData.taskDescription}"
+
+# Initial base instruction
+base_instruction: |
+ ${formData.baseInstruction.replace(/\n/g, '\n ')}
+
+# Answer format specification
+answer_format: "${formData.answerFormat}"
+
+# Evaluation criteria
+evaluation_criteria:
+${formData.evaluationCriteria.map(criteria => ` - "${criteria}"`).join('\n')}
+
+# Optional features
+use_examples: ${formData.useExamples}
+generate_synthetic_examples: ${formData.useExamples}
+run_without_train_examples: ${!formData.useExamples}
+generate_expert_identity: true
+generate_intent_keywords: true
+`;
+
+ // Create a blob with the YAML content
+ const blob = new Blob([yaml], { type: 'text/yaml' });
+ const url = URL.createObjectURL(blob);
+
+ // Create a temporary link and trigger download
+ const a = document.createElement('a');
+ a.href = url;
+ a.download = 'promptopt_config.yaml';
+ document.body.appendChild(a);
+ a.click();
+
+ // Clean up
+ document.body.removeChild(a);
+ URL.revokeObjectURL(url);
+};
diff --git a/ui/src/utils/tooltipDefinitions.js b/ui/src/utils/tooltipDefinitions.js
new file mode 100644
index 00000000..68deeb2b
--- /dev/null
+++ b/ui/src/utils/tooltipDefinitions.js
@@ -0,0 +1,93 @@
+/**
+ * Tooltip definitions for UI elements
+ */
+export const tooltipDefinitions = {
+ taskDescription: "A clear description of the task you want the prompt to accomplish. This helps the model understand the context and purpose of the prompt.",
+
+ baseInstruction: "Your initial prompt that needs optimization. This is the starting point that will be refined through the optimization process.",
+
+ answerFormat: "The desired format for the model's response (e.g., JSON, bullet points, paragraph, etc.). Specifying this helps ensure consistent outputs.",
+
+ dataset: {
+ label: "The dataset used for optimization. Select from predefined datasets or use your own custom data.",
+ options: {
+ Custom: "Use your own custom data. You'll need to provide examples in the format specified by the task.",
+ GSM8k: "Grade School Math 8K - A dataset of 8,500 high quality grade school math problems. Use case: Mathematical problem solving with step-by-step reasoning.",
+ SVAMP: "Simple Variations on Arithmetic Math word Problems - A dataset focused on elementary math word problems. Use case: Basic arithmetic reasoning and problem solving.",
+ AQUARAT: "AQuA-RAT - A dataset of algebraic word problems with rationales. Use case: Advanced mathematical reasoning with explanations.",
+ BBII: "Big Bench Instruction Induction - A dataset for testing instruction following capabilities. Use case: General instruction following and task completion."
+ }
+ },
+
+ model: {
+ label: "The language model to use for optimization.",
+ options: {
+ Gemini: "Google's Gemini model, optimized for multimodal tasks and reasoning.",
+ "GPT-4": "OpenAI's GPT-4 model, known for strong instruction following and reasoning capabilities.",
+ Custom: "Use a custom model by providing its API endpoint and parameters."
+ }
+ },
+
+ apiKey: "Your API key for the selected model. This is required to make API calls to the model provider.",
+
+ mutationRounds: "The number of rounds of mutation to be performed when generating different styles. Higher values may lead to better results but take longer to process.",
+
+ refineSteps: "The number of refinement steps after each mutation round. More steps can lead to more polished prompts.",
+
+ mutateRefineIterations: "Number of iterations for conducting mutation rounds followed by refinement of instructions. Higher values lead to more thorough optimization.",
+
+ refineTaskEgIterations: "Number of iterations for refining task description and in-context examples for few-shot learning. Higher values improve example quality.",
+
+ refineInstruction: "Whether to refine instructions after mutation. Enabling this leads to more polished prompts but increases processing time.",
+
+ minCorrectCount: "Number of batches of questions to correctly answer for a prompt to be considered as performing well. Higher values ensure better quality.",
+
+ maxEvalBatches: "Maximum number of mini-batches on which to evaluate the prompt. Higher values provide more thorough evaluation but increase processing time.",
+
+ topN: "Number of top best-performing prompts to be considered for next iterations. Higher values explore more variations but may slow down convergence.",
+
+ questionsBatchSize: "Number of questions to be asked to the LLM in a single batch during training. Higher values speed up training but may reduce quality.",
+
+ useExamples: "Whether to use in-context examples during optimization. Examples help the model understand the task better and can improve results.",
+
+ generateSyntheticExamples: "Generate synthetic examples for training when no training data is available. The model will create examples based on the task description. A dataset can still be used for evaluation.",
+
+ generateExpertIdentity: "Generate a description of an expert who can solve the task. This helps the model adopt the right persona and approach.",
+
+ generateIntentKeywords: "Generate keywords that describe the intent of the task. This helps the model understand the purpose of the prompt.",
+
+ styleVariation: "Number of variations of prompts to generate in each iteration. Higher values explore more diverse approaches.",
+
+ fewShotCount: "Number of examples to include in the prompt for few-shot learning. More examples can improve performance but increase token usage.",
+
+ evaluationCriteria: {
+ label: "Basic criteria used to evaluate and improve the prompt.",
+ options: {
+ Accuracy: "How well the prompt produces factually correct and precise responses.",
+ Clarity: "How clear and unambiguous the prompt is.",
+ Completeness: "How well the prompt covers all aspects of the task.",
+ Relevance: "How relevant the prompt is to the specific task.",
+ Conciseness: "How efficiently the prompt communicates without unnecessary verbosity."
+ }
+ },
+
+ advancedEvaluationMetrics: {
+ label: "Advanced metrics for more comprehensive prompt evaluation.",
+ options: {
+ Faithfulness: "Measures how faithful the generated content is to the source material.",
+ SemanticSimilarity: "Evaluates semantic similarity between generated and reference outputs.",
+ ContextRelevancy: "Assesses how relevant the context is to the query.",
+ HitRate: "Percentage of relevant items retrieved from the total number of relevant items.",
+ MRR: "Mean Reciprocal Rank - evaluates the position of the first relevant item in the results.",
+ NDCG: "Normalized Discounted Cumulative Gain - measures ranking quality with position-based weighting."
+ }
+ },
+
+ showDatasetPreview: "Preview and visualize the dataset before optimization to ensure it meets your requirements.",
+
+ enableMultimodal: "Enable support for multimodal inputs like images. This allows optimizing prompts for image-based tasks.",
+
+ saveSession: "Save the current optimization session for later use. This allows you to continue optimization or compare results.",
+
+ sessionName: "Name for the saved optimization session. Use a descriptive name to easily identify it later."
+};
diff --git a/ui/tsconfig.json b/ui/tsconfig.json
new file mode 100644
index 00000000..c1334095
--- /dev/null
+++ b/ui/tsconfig.json
@@ -0,0 +1,27 @@
+{
+ "compilerOptions": {
+ "target": "ES2017",
+ "lib": ["dom", "dom.iterable", "esnext"],
+ "allowJs": true,
+ "skipLibCheck": true,
+ "strict": true,
+ "noEmit": true,
+ "esModuleInterop": true,
+ "module": "esnext",
+ "moduleResolution": "bundler",
+ "resolveJsonModule": true,
+ "isolatedModules": true,
+ "jsx": "preserve",
+ "incremental": true,
+ "plugins": [
+ {
+ "name": "next"
+ }
+ ],
+ "paths": {
+ "@/*": ["./src/*"]
+ }
+ },
+ "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
+ "exclude": ["node_modules"]
+}
diff --git a/vercel.json b/vercel.json
new file mode 100644
index 00000000..679a5bd0
--- /dev/null
+++ b/vercel.json
@@ -0,0 +1,15 @@
+{
+ "version": 2,
+ "builds": [
+ {
+ "src": "ui/package.json",
+ "use": "@vercel/next"
+ }
+ ],
+ "routes": [
+ {
+ "src": "/(.*)",
+ "dest": "ui/$1"
+ }
+ ]
+}