mirror of
https://github.com/microsoft/generative-ai-for-beginners.git
synced 2026-06-05 21:07:14 +08:00
269 lines
7.5 KiB
Plaintext
269 lines
7.5 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"import torch \n",
|
|
"from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline "
|
|
],
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stderr",
|
|
"text": "2024-08-20 03:12:24.098444: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n2024-08-20 03:12:24.098474: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n"
|
|
}
|
|
],
|
|
"execution_count": 1,
|
|
"metadata": {
|
|
"gather": {
|
|
"logged": 1724123553820
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"torch.random.manual_seed(0) \n",
|
|
"model = AutoModelForCausalLM.from_pretrained( \n",
|
|
" \"../phi-3-instruct\", \n",
|
|
" device_map=\"cuda\", \n",
|
|
" torch_dtype=\"auto\", \n",
|
|
" trust_remote_code=True, \n",
|
|
") "
|
|
],
|
|
"outputs": [
|
|
{
|
|
"output_type": "display_data",
|
|
"data": {
|
|
"text/plain": "Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]",
|
|
"application/vnd.jupyter.widget-view+json": {
|
|
"version_major": 2,
|
|
"version_minor": 0,
|
|
"model_id": "c29888138a4f4714b7f65112733da2d2"
|
|
}
|
|
},
|
|
"metadata": {}
|
|
}
|
|
],
|
|
"execution_count": 2,
|
|
"metadata": {
|
|
"jupyter": {
|
|
"source_hidden": false,
|
|
"outputs_hidden": false
|
|
},
|
|
"nteract": {
|
|
"transient": {
|
|
"deleting": false
|
|
}
|
|
},
|
|
"gather": {
|
|
"logged": 1724123630099
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"tokenizer = AutoTokenizer.from_pretrained(\"../phi-3-instruct\") "
|
|
],
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stderr",
|
|
"text": "You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message.\n"
|
|
}
|
|
],
|
|
"execution_count": 3,
|
|
"metadata": {
|
|
"jupyter": {
|
|
"source_hidden": false,
|
|
"outputs_hidden": false
|
|
},
|
|
"nteract": {
|
|
"transient": {
|
|
"deleting": false
|
|
}
|
|
},
|
|
"gather": {
|
|
"logged": 1724123630234
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# messages = [ \n",
|
|
"# {\"role\": \"system\", \"content\": \"你是我的人工智能助手,协助我用中文解答问题\"}, \n",
|
|
"# {\"role\": \"user\", \"content\": \"你能否介绍一下如何提升学习效率吗?\"}, \n",
|
|
"# ] \n",
|
|
"\n",
|
|
"# <|system|>\n",
|
|
"# You are a helpful assistant.<|end|>\n",
|
|
"# <|user|>\n",
|
|
"# Question?<|end|>\n",
|
|
"# <|assistant|>\n",
|
|
"\n",
|
|
"messages = \"<|system|>\\n 你是我的人工智能助手,协助我用中文解答问题.\\n<|end|><|user|>\\n 你知道长沙吗?? \\n<|end|><|assistant|>\"\n"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": 9,
|
|
"metadata": {
|
|
"jupyter": {
|
|
"source_hidden": false,
|
|
"outputs_hidden": false
|
|
},
|
|
"nteract": {
|
|
"transient": {
|
|
"deleting": false
|
|
}
|
|
},
|
|
"gather": {
|
|
"logged": 1724123692174
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"pipe = pipeline( \n",
|
|
" \"text-generation\", \n",
|
|
" model=model, \n",
|
|
" tokenizer=tokenizer, \n",
|
|
") "
|
|
],
|
|
"outputs": [],
|
|
"execution_count": 10,
|
|
"metadata": {
|
|
"jupyter": {
|
|
"source_hidden": false,
|
|
"outputs_hidden": false
|
|
},
|
|
"nteract": {
|
|
"transient": {
|
|
"deleting": false
|
|
}
|
|
},
|
|
"gather": {
|
|
"logged": 1724123693760
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"generation_args = { \n",
|
|
" \"max_new_tokens\": 1024, \n",
|
|
" \"return_full_text\": False, \n",
|
|
" \"temperature\": 0.3, \n",
|
|
" \"do_sample\": False, \n",
|
|
"} "
|
|
],
|
|
"outputs": [],
|
|
"execution_count": 11,
|
|
"metadata": {
|
|
"jupyter": {
|
|
"source_hidden": false,
|
|
"outputs_hidden": false
|
|
},
|
|
"nteract": {
|
|
"transient": {
|
|
"deleting": false
|
|
}
|
|
},
|
|
"gather": {
|
|
"logged": 1724123695747
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"output = pipe(messages, **generation_args) "
|
|
],
|
|
"outputs": [],
|
|
"execution_count": 12,
|
|
"metadata": {
|
|
"jupyter": {
|
|
"source_hidden": false,
|
|
"outputs_hidden": false
|
|
},
|
|
"nteract": {
|
|
"transient": {
|
|
"deleting": false
|
|
}
|
|
},
|
|
"gather": {
|
|
"logged": 1724123704229
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"output[0]['generated_text']"
|
|
],
|
|
"outputs": [
|
|
{
|
|
"output_type": "execute_result",
|
|
"execution_count": 8,
|
|
"data": {
|
|
"text/plain": "' 是的,我知道长沙。长沙是中国南部的一座大城市,位于湖南省中部,是中国的国际性大都市。它拥有丰富的历史和文化,曾是明朝的都城。长沙以其繁华的商业、独特的自然风光和丰富的历史遗迹而闻名,如橘子洲、岳麓山和长沙博物院。此外,长沙也是中国重要的科技和教育中心,包括中国工程院和中国科学院长沙分院。'"
|
|
},
|
|
"metadata": {}
|
|
}
|
|
],
|
|
"execution_count": 8,
|
|
"metadata": {
|
|
"jupyter": {
|
|
"source_hidden": false,
|
|
"outputs_hidden": false
|
|
},
|
|
"nteract": {
|
|
"transient": {
|
|
"deleting": false
|
|
}
|
|
},
|
|
"gather": {
|
|
"logged": 1724123639263
|
|
}
|
|
}
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"name": "python38-azureml",
|
|
"language": "python",
|
|
"display_name": "Python 3.8 - AzureML"
|
|
},
|
|
"language_info": {
|
|
"name": "python",
|
|
"version": "3.9.19",
|
|
"mimetype": "text/x-python",
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"pygments_lexer": "ipython3",
|
|
"nbconvert_exporter": "python",
|
|
"file_extension": ".py"
|
|
},
|
|
"microsoft": {
|
|
"ms_spell_check": {
|
|
"ms_spell_check_language": "en"
|
|
},
|
|
"host": {
|
|
"AzureML": {
|
|
"notebookHasBeenCompleted": true
|
|
}
|
|
}
|
|
},
|
|
"kernel_info": {
|
|
"name": "python38-azureml"
|
|
},
|
|
"nteract": {
|
|
"version": "nteract-front-end@1.0.0"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
} |