diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py index e0b936ae..0d18991c 100644 --- a/scrapegraphai/helpers/models_tokens.py +++ b/scrapegraphai/helpers/models_tokens.py @@ -51,70 +51,71 @@ models_tokens = { "gemini-1.5-pro-latest": 128000, "models/embedding-001": 2048 }, - "ollama": { "command-r": 12800, - "codellama": 16000, - "dbrx": 32768, - "deepseek-coder:33b": 16000, - "falcon": 2048, - "llama2": 4096, - "llama3": 8192, - "llama3:70b": 8192, - "llama3.1":128000, - "llama3.1:8b": 128000, - "llama3.1:70b": 128000, - "lama3.1:405b": 128000, - "scrapegraph": 8192, - "llava": 4096, - "mixtral:8x22b-instruct": 65536, - "mistral-openorca": 32000, - "nomic-embed-text": 8192, - "nous-hermes2:34b": 4096, - "orca-mini": 2048, - "phi3:3.8b": 12800, - "qwen:0.5b": 32000, - "qwen:1.8b": 32000, - "qwen:4b": 32000, - "qwen:14b": 32000, - "qwen:32b": 32000, - "qwen:72b": 32000, - "qwen:110b": 32000, - "stablelm-zephyr": 8192, - "wizardlm2:8x22b": 65536, - # embedding models - "shaw/dmeta-embedding-zh-small-q4": 8192, - "shaw/dmeta-embedding-zh-q4": 8192, - "chevalblanc/acge_text_embedding": 8192, - "martcreation/dmeta-embedding-zh": 8192, - "snowflake-arctic-embed": 8192, - "mxbai-embed-large": 512 + "ollama": { + "command-r": 12800, + "codellama": 16000, + "dbrx": 32768, + "deepseek-coder:33b": 16000, + "falcon": 2048, + "llama2": 4096, + "llama3": 8192, + "llama3:70b": 8192, + "llama3.1":128000, + "llama3.1:8b": 128000, + "llama3.1:70b": 128000, + "lama3.1:405b": 128000, + "scrapegraph": 8192, + "llava": 4096, + "mixtral:8x22b-instruct": 65536, + "mistral-openorca": 32000, + "nomic-embed-text": 8192, + "nous-hermes2:34b": 4096, + "orca-mini": 2048, + "phi3:3.8b": 12800, + "qwen:0.5b": 32000, + "qwen:1.8b": 32000, + "qwen:4b": 32000, + "qwen:14b": 32000, + "qwen:32b": 32000, + "qwen:72b": 32000, + "qwen:110b": 32000, + "stablelm-zephyr": 8192, + "wizardlm2:8x22b": 65536, + # embedding models + "shaw/dmeta-embedding-zh-small-q4": 8192, + "shaw/dmeta-embedding-zh-q4": 8192, + "chevalblanc/acge_text_embedding": 8192, + "martcreation/dmeta-embedding-zh": 8192, + "snowflake-arctic-embed": 8192, + "mxbai-embed-large": 512, }, "oneapi": { - "qwen-turbo": 6000 + "qwen-turbo": 6000, }, - "nvidia": { + "nv dia": { "meta/llama3-70b-instruct": 419, "meta/llama3-8b-instruct": 419, "nemotron-4-340b-instruct": 1024, - "databricks/dbrx-instruct": 4096, - "google/codegemma-7b": 8192, - "google/gemma-2b": 2048, - "google/gemma-7b": 8192, - "google/recurrentgemma-2b": 2048, - "meta/codellama-70b": 16384, - "meta/llama2-70b": 4096, - "microsoft/phi-3-mini-128k-instruct": 122880, - "mistralai/mistral-7b-instruct-v0.2": 4096, - "mistralai/mistral-large": 8192, - "mistralai/mixtral-8x22b-instruct-v0.1": 32768, - "mistralai/mixtral-8x7b-instruct-v0.1": 8192, - "snowflake/arctic": 16384, + "databricks/dbrx-instruct": 4096, + "google/codegemma-7b": 8192, + "google/gemma-2b": 2048, + "google/gemma-7b": 8192, + "google/recurrentgemma-2b": 2048, + "meta/codellama-70b": 16384, + "meta/llama2-70b": 4096, + "microsoft/phi-3-mini-128k-instruct": 122880, + "mistralai/mistral-7b-instruct-v0.2": 4096, + "mistralai/mistral-large": 8192, + "mistralai/mixtral-8x22b-instruct-v0.1": 32768, + "mistralai/mixtral-8x7b-instruct-v0.1": 8192, + "snowflake/arctic": 16384, }, "groq": { "llama3-8b-8192": 8192, "llama3-70b-8192": 8192, "mixtral-8x7b-32768": 32768, "gemma-7b-it": 8192, - "claude-3-haiku-20240307'": 8192 + "claude-3-haiku-20240307'": 8192, }, "claude": { "claude_instant": 100000, @@ -125,12 +126,12 @@ models_tokens = { "claude-3-opus-20240229": 200000, "claude-3-sonnet-20240229": 200000, "claude-3-haiku-20240307": 200000, - "claude-3-5-sonnet-20240620": 200000 + "claude-3-5-sonnet-20240620": 200000, }, "google_vertexai": { "gemini-1.5-flash": 128000, "gemini-1.5-pro": 128000, - "gemini-1.0-pro": 128000 + "gemini-1.0-pro": 128000, }, "bedrock": { "anthropic.claude-3-haiku-20240307-v1:0": 200000, @@ -149,7 +150,7 @@ models_tokens = { "amazon.titan-embed-text-v1": 8000, "amazon.titan-embed-text-v2:0": 8000, "cohere.embed-english-v3": 512, - "cohere.embed-multilingual-v3": 512 + "cohere.embed-multilingual-v3": 512, }, "mistralai": { "mistral-large-latest": 128000, @@ -159,7 +160,7 @@ models_tokens = { "open-mistral-7b": 32000, "open-mixtral-8x7b": 32000, "open-mixtral-8x22b": 64000, - "open-codestral-mamba": 256000 + "open-codestral-mamba": 256000, }, "hugging_face": { "xai-org/grok-1": 8192, @@ -193,11 +194,11 @@ models_tokens = { "TheBloke/dolphin-2.7-mixtral-8x7b-GGUF": 32768, "deepseek-ai/DeepSeek-V2": 131072, "deepseek-ai/DeepSeek-V2-Chat": 131072, - "claude-3-haiku": 200000 + "claude-3-haiku": 200000, }, "deepseek": { "deepseek-chat": 28672, - "deepseek-coder": 16384 + "deepseek-coder": 16384, }, "ernie": { "ernie-bot-turbo": 4096, @@ -207,16 +208,16 @@ models_tokens = { "ernie-bot-2-base-zh": 4096, "ernie-bot-2-base-en": 4096, "ernie-bot-2-base-en-zh": 4096, - "ernie-bot-2-base-zh-en": 4096 + "ernie-bot-2-base-zh-en": 4096, }, "fireworks": { "llama-v2-7b": 4096, "mixtral-8x7b-instruct": 4096, - "nomic-ai/nomic-embed-text-v1.5": 8192, + "nomic-ai/nomic-embed-text-v1.5": 8192, "llama-3.1-405B-instruct": 131072, "llama-3.1-70B-instruct": 131072, "llama-3.1-8B-instruct": 131072, "mixtral-moe-8x22B-instruct": 65536, - "mixtral-moe-8x7B-instruct": 65536 + "mixtral-moe-8x7B-instruct": 65536, }, }