summarization

parent 62fa3225
{
"cells": [
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"import warnings\n",
"import numpy as np\n",
"import pandas as pd\n",
"import PyPDF2, re, os\n",
"from datasets import Dataset\n",
"from transformers import DataCollatorForSeq2Seq\n",
"from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, \\\n",
" Seq2SeqTrainingArguments, Seq2SeqTrainer, pipeline\n",
"warnings.filterwarnings('ignore')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"model_card = \"t5-small\"\n",
"tokenizer = AutoTokenizer.from_pretrained(model_card)\n",
"model = AutoModelForSeq2SeqLM.from_pretrained(model_card)\n",
"model.to('cuda')\n",
"\n",
"data_collator = DataCollatorForSeq2Seq(\n",
" tokenizer=tokenizer, \n",
" model=model_card\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def load_data(lesson_dir = 'data/PPT1Dtaa.xlsx'):\n",
" df = pd.read_excel(lesson_dir)\n",
" df = df[['Normal_Tesxt', 'Summarized_text']]\n",
" df = df.dropna(subset=['Normal_Tesxt', 'Summarized_text'])\n",
" lessons = df['Normal_Tesxt'].tolist()\n",
" summaries = df['Summarized_text'].tolist()\n",
" \n",
" dataset = {}\n",
" dataset['lessons'] = lessons\n",
" dataset['summaries'] = summaries\n",
"\n",
" dataset = Dataset.from_dict(dataset)\n",
" dataset = dataset.train_test_split(test_size=0.1)\n",
" train_dataset = dataset['train']\n",
" test_dataset = dataset['test']\n",
" return train_dataset, test_dataset, lessons, summaries"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"train_dataset, test_dataset, lessons, summaries = load_data()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Lesson token length: \n",
"count 72.000000\n",
"mean 94.319444\n",
"std 21.845054\n",
"min 35.000000\n",
"25% 79.500000\n",
"50% 93.000000\n",
"75% 109.000000\n",
"max 162.000000\n",
"dtype: float64\n",
"\n",
"Summary token length: \n",
"count 72.000000\n",
"mean 78.125000\n",
"std 24.467246\n",
"min 34.000000\n",
"25% 60.000000\n",
"50% 76.000000\n",
"75% 90.250000\n",
"max 136.000000\n",
"dtype: float64\n"
]
}
],
"source": [
"# analyze the token lengths \n",
"lesson_token_lengths = [len(tokenizer.encode(lesson)) for lesson in lessons]\n",
"summary_token_lengths = [len(tokenizer.encode(summary)) for summary in summaries]\n",
"\n",
"lesson_token_lengths = pd.Series(lesson_token_lengths)\n",
"summary_token_lengths = pd.Series(summary_token_lengths)\n",
"\n",
"print(f\"Lesson token length: \\n{lesson_token_lengths.describe()}\")\n",
"print(f\"\\nSummary token length: \\n{summary_token_lengths.describe()}\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3f9b7a8718d4402c8304f27488cadae3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Map: 0%| | 0/64 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a3e5bd596a9a4dea9e7c7a908278bad2",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Map: 0%| | 0/8 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"prefix = \"summarize: \"\n",
"\n",
"def preprocess_function(examples):\n",
" inputs = [prefix + doc for doc in examples[\"lessons\"]]\n",
" model_inputs = tokenizer(inputs, max_length=160, truncation=True)\n",
"\n",
" labels = tokenizer(text_target=examples[\"summaries\"], max_length=135, truncation=True)\n",
"\n",
" model_inputs[\"labels\"] = labels[\"input_ids\"]\n",
" return model_inputs\n",
"\n",
"train_dataset = train_dataset.map(preprocess_function, batched=True)\n",
"test_dataset = test_dataset.map(preprocess_function, batched=True)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Projects\\private-projects\\SLIIT\\Group\\PP2 Progressing\\Edu Me ML\\PP2\\files/lesson-summarization is already a clone of https://huggingface.co/zuu/lesson-summarization. Make sure you pull the latest changes with `repo.git_pull()`.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "fc4f94541cda4fc0bbcf3043e834802b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/12800 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 2.9037, 'learning_rate': 1.9692187500000003e-05, 'epoch': 3.12}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9cef058f6d874881ad0ef7e70e3c250f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.2455663681030273, 'eval_runtime': 0.176, 'eval_samples_per_second': 45.444, 'eval_steps_per_second': 45.444, 'epoch': 3.12}\n",
"{'loss': 2.5914, 'learning_rate': 1.9379687500000003e-05, 'epoch': 6.25}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b2ca146c913a4e509245acce495a7a8f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.1497743129730225, 'eval_runtime': 0.171, 'eval_samples_per_second': 46.773, 'eval_steps_per_second': 46.773, 'epoch': 6.25}\n",
"{'loss': 2.393, 'learning_rate': 1.9067187500000002e-05, 'epoch': 9.38}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4034bfda0ac64542931d270f6262bd07",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.100229024887085, 'eval_runtime': 0.167, 'eval_samples_per_second': 47.893, 'eval_steps_per_second': 47.893, 'epoch': 9.38}\n",
"{'loss': 2.2409, 'learning_rate': 1.87546875e-05, 'epoch': 12.5}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "52c49265b63b48b990a1c7a143977e37",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.0753684043884277, 'eval_runtime': 0.185, 'eval_samples_per_second': 43.233, 'eval_steps_per_second': 43.233, 'epoch': 12.5}\n",
"{'loss': 2.1515, 'learning_rate': 1.84421875e-05, 'epoch': 15.62}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "755d541d3bdf46998587b1c6c46eed99",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.0683367252349854, 'eval_runtime': 0.175, 'eval_samples_per_second': 45.704, 'eval_steps_per_second': 45.704, 'epoch': 15.62}\n",
"{'loss': 2.0633, 'learning_rate': 1.8129687500000003e-05, 'epoch': 18.75}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6850ffb174be4f42b23b6edbda69707f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.0541341304779053, 'eval_runtime': 0.175, 'eval_samples_per_second': 45.704, 'eval_steps_per_second': 45.704, 'epoch': 18.75}\n",
"{'loss': 1.9418, 'learning_rate': 1.7817187500000002e-05, 'epoch': 21.88}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "026e67cc094844d187e783ec8eab072c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.060283660888672, 'eval_runtime': 0.165, 'eval_samples_per_second': 48.474, 'eval_steps_per_second': 48.474, 'epoch': 21.88}\n",
"{'loss': 1.837, 'learning_rate': 1.75046875e-05, 'epoch': 25.0}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "39d38ea8d7f3462792af7f77fee20ad3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.0788185596466064, 'eval_runtime': 0.17, 'eval_samples_per_second': 47.048, 'eval_steps_per_second': 47.048, 'epoch': 25.0}\n",
"{'loss': 1.7715, 'learning_rate': 1.71921875e-05, 'epoch': 28.12}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "56ca81a0b4f94fd1bdbc6c111a6086e9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.0754458904266357, 'eval_runtime': 0.182, 'eval_samples_per_second': 43.946, 'eval_steps_per_second': 43.946, 'epoch': 28.12}\n",
"{'loss': 1.6957, 'learning_rate': 1.6879687500000003e-05, 'epoch': 31.25}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "49fa4a86cb7945e683d1decee0c69e7a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.0815188884735107, 'eval_runtime': 0.166, 'eval_samples_per_second': 48.182, 'eval_steps_per_second': 48.182, 'epoch': 31.25}\n",
"{'loss': 1.6079, 'learning_rate': 1.6568750000000003e-05, 'epoch': 34.38}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7c3aed39b331414784044f7f2b2dd1fc",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.094008207321167, 'eval_runtime': 0.175, 'eval_samples_per_second': 45.704, 'eval_steps_per_second': 45.704, 'epoch': 34.38}\n",
"{'loss': 1.5947, 'learning_rate': 1.6256250000000002e-05, 'epoch': 37.5}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2aa5f7402b7c4e47b2f3b4008cd0f1ea",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.109351634979248, 'eval_runtime': 0.17, 'eval_samples_per_second': 47.048, 'eval_steps_per_second': 47.048, 'epoch': 37.5}\n",
"{'loss': 1.4603, 'learning_rate': 1.594375e-05, 'epoch': 40.62}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f5e250191f394872b0694300aa72195a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.1147055625915527, 'eval_runtime': 0.178, 'eval_samples_per_second': 44.934, 'eval_steps_per_second': 44.934, 'epoch': 40.62}\n",
"{'loss': 1.4621, 'learning_rate': 1.5631250000000004e-05, 'epoch': 43.75}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "00e5336f4b5e48baaae672a2b7432aac",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.135416030883789, 'eval_runtime': 0.177, 'eval_samples_per_second': 45.188, 'eval_steps_per_second': 45.188, 'epoch': 43.75}\n",
"{'loss': 1.4021, 'learning_rate': 1.5318750000000003e-05, 'epoch': 46.88}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "898b59d5ffba4baf99d21e18b374f934",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.1519408226013184, 'eval_runtime': 0.176, 'eval_samples_per_second': 45.444, 'eval_steps_per_second': 45.444, 'epoch': 46.88}\n",
"{'loss': 1.3394, 'learning_rate': 1.5006250000000002e-05, 'epoch': 50.0}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "cd807bae9df64581a08bb1036080a52b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.166985273361206, 'eval_runtime': 0.178, 'eval_samples_per_second': 44.934, 'eval_steps_per_second': 44.934, 'epoch': 50.0}\n",
"{'loss': 1.2866, 'learning_rate': 1.4693750000000001e-05, 'epoch': 53.12}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2ba71ca51b744cbcba2e3a912784101e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.1920700073242188, 'eval_runtime': 0.1791, 'eval_samples_per_second': 44.677, 'eval_steps_per_second': 44.677, 'epoch': 53.12}\n",
"{'loss': 1.2681, 'learning_rate': 1.4382812500000001e-05, 'epoch': 56.25}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f6774b23e3fc455985f7725bab9cdf03",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.2044565677642822, 'eval_runtime': 0.164, 'eval_samples_per_second': 48.769, 'eval_steps_per_second': 48.769, 'epoch': 56.25}\n",
"{'loss': 1.1866, 'learning_rate': 1.40703125e-05, 'epoch': 59.38}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b558849c83e345f28880cd23f98bc1d5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.2193875312805176, 'eval_runtime': 0.174, 'eval_samples_per_second': 45.966, 'eval_steps_per_second': 45.966, 'epoch': 59.38}\n",
"{'loss': 1.2098, 'learning_rate': 1.3757812500000002e-05, 'epoch': 62.5}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "aeeb0feb24fb4844a31e5099747ba1c4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.2302308082580566, 'eval_runtime': 0.181, 'eval_samples_per_second': 44.189, 'eval_steps_per_second': 44.189, 'epoch': 62.5}\n",
"{'loss': 1.1386, 'learning_rate': 1.34453125e-05, 'epoch': 65.62}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f3fb41f76bf343a5a3e5451f70cca059",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.2400026321411133, 'eval_runtime': 0.173, 'eval_samples_per_second': 46.232, 'eval_steps_per_second': 46.232, 'epoch': 65.62}\n",
"{'loss': 1.0853, 'learning_rate': 1.31328125e-05, 'epoch': 68.75}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "15db91918cf44713841b5d34ed9f3757",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.2633779048919678, 'eval_runtime': 0.165, 'eval_samples_per_second': 48.474, 'eval_steps_per_second': 48.474, 'epoch': 68.75}\n",
"{'loss': 1.0888, 'learning_rate': 1.28203125e-05, 'epoch': 71.88}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "48d4ca7b20d34650ad18a278ba9f356b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.28098726272583, 'eval_runtime': 0.174, 'eval_samples_per_second': 45.967, 'eval_steps_per_second': 45.967, 'epoch': 71.88}\n",
"{'loss': 1.0408, 'learning_rate': 1.25078125e-05, 'epoch': 75.0}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6e661e3f089942949faf81b3ddbbdf0a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.2908809185028076, 'eval_runtime': 0.168, 'eval_samples_per_second': 47.608, 'eval_steps_per_second': 47.608, 'epoch': 75.0}\n",
"{'loss': 1.0309, 'learning_rate': 1.21953125e-05, 'epoch': 78.12}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2febad3cd49d408284ca28c921c403ac",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.3059136867523193, 'eval_runtime': 0.171, 'eval_samples_per_second': 46.774, 'eval_steps_per_second': 46.774, 'epoch': 78.12}\n",
"{'loss': 0.9523, 'learning_rate': 1.18828125e-05, 'epoch': 81.25}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5471b0fbf97b445ba178eae0676d1779",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.3248863220214844, 'eval_runtime': 0.168, 'eval_samples_per_second': 47.608, 'eval_steps_per_second': 47.608, 'epoch': 81.25}\n",
"{'loss': 0.9671, 'learning_rate': 1.1570312500000001e-05, 'epoch': 84.38}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "de65df02421140288275ae73f5e5376b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.333310127258301, 'eval_runtime': 0.165, 'eval_samples_per_second': 48.474, 'eval_steps_per_second': 48.474, 'epoch': 84.38}\n",
"{'loss': 0.9413, 'learning_rate': 1.12578125e-05, 'epoch': 87.5}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "16b796d25d484711bd2f579b0590342a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.354335308074951, 'eval_runtime': 0.17, 'eval_samples_per_second': 47.049, 'eval_steps_per_second': 47.049, 'epoch': 87.5}\n",
"{'loss': 0.9127, 'learning_rate': 1.0945312500000001e-05, 'epoch': 90.62}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b8450c8e6aef4d62abbec4b593b0b58e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.363644599914551, 'eval_runtime': 0.18, 'eval_samples_per_second': 44.434, 'eval_steps_per_second': 44.434, 'epoch': 90.62}\n",
"{'loss': 0.9095, 'learning_rate': 1.06328125e-05, 'epoch': 93.75}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a8041e19c3b347b9823e952156e54ace",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.367572784423828, 'eval_runtime': 0.164, 'eval_samples_per_second': 48.769, 'eval_steps_per_second': 48.769, 'epoch': 93.75}\n",
"{'loss': 0.8952, 'learning_rate': 1.0320312500000001e-05, 'epoch': 96.88}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8aa4df9d75ad4e58b572e8032f7dc16e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.3755526542663574, 'eval_runtime': 0.175, 'eval_samples_per_second': 45.704, 'eval_steps_per_second': 45.704, 'epoch': 96.88}\n",
"{'loss': 0.857, 'learning_rate': 1.00078125e-05, 'epoch': 100.0}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ec4972e5713c42419d1041569ca4cdce",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.3877663612365723, 'eval_runtime': 0.1654, 'eval_samples_per_second': 48.378, 'eval_steps_per_second': 48.378, 'epoch': 100.0}\n",
"{'loss': 0.8474, 'learning_rate': 9.695312500000001e-06, 'epoch': 103.12}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d51f83a7ad624a11ae4aa21878b3a13e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.414849281311035, 'eval_runtime': 0.177, 'eval_samples_per_second': 45.188, 'eval_steps_per_second': 45.188, 'epoch': 103.12}\n",
"{'loss': 0.8215, 'learning_rate': 9.3828125e-06, 'epoch': 106.25}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "fc979efc0c564078b434a823478a1b8a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.423121929168701, 'eval_runtime': 0.179, 'eval_samples_per_second': 44.683, 'eval_steps_per_second': 44.683, 'epoch': 106.25}\n",
"{'loss': 0.8172, 'learning_rate': 9.070312500000001e-06, 'epoch': 109.38}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e4aeed9cdcbc4a2c94f472322aa160a8",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.4243147373199463, 'eval_runtime': 0.171, 'eval_samples_per_second': 46.773, 'eval_steps_per_second': 46.773, 'epoch': 109.38}\n",
"{'loss': 0.7761, 'learning_rate': 8.7578125e-06, 'epoch': 112.5}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c3a393c296e24dc4a38f3b677ce2bb95",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.448868751525879, 'eval_runtime': 0.189, 'eval_samples_per_second': 42.319, 'eval_steps_per_second': 42.319, 'epoch': 112.5}\n",
"{'loss': 0.7737, 'learning_rate': 8.4453125e-06, 'epoch': 115.62}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "44a27cbb58524d3082103b5876b0827a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.4718222618103027, 'eval_runtime': 0.172, 'eval_samples_per_second': 46.501, 'eval_steps_per_second': 46.501, 'epoch': 115.62}\n",
"{'loss': 0.7476, 'learning_rate': 8.134375000000001e-06, 'epoch': 118.75}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9306979edf8b4e6e92474cf98918cdd8",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.461360454559326, 'eval_runtime': 0.178, 'eval_samples_per_second': 44.934, 'eval_steps_per_second': 44.934, 'epoch': 118.75}\n",
"{'loss': 0.7345, 'learning_rate': 7.821875e-06, 'epoch': 121.88}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "070d29114e944fdaa75a356d836d36a5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.4704580307006836, 'eval_runtime': 0.2221, 'eval_samples_per_second': 36.028, 'eval_steps_per_second': 36.028, 'epoch': 121.88}\n",
"{'loss': 0.7426, 'learning_rate': 7.509375000000001e-06, 'epoch': 125.0}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "305847635a2241daa972500e5519bee1",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.473985195159912, 'eval_runtime': 0.185, 'eval_samples_per_second': 43.233, 'eval_steps_per_second': 43.233, 'epoch': 125.0}\n",
"{'loss': 0.7151, 'learning_rate': 7.196875000000001e-06, 'epoch': 128.12}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "642c873e8590486ca47a1a1d1354d4ad",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.4833295345306396, 'eval_runtime': 0.214, 'eval_samples_per_second': 37.375, 'eval_steps_per_second': 37.375, 'epoch': 128.12}\n",
"{'loss': 0.7191, 'learning_rate': 6.884375000000001e-06, 'epoch': 131.25}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "795309ad803248728c805f60dcc989c5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.4785890579223633, 'eval_runtime': 0.185, 'eval_samples_per_second': 43.234, 'eval_steps_per_second': 43.234, 'epoch': 131.25}\n",
"{'loss': 0.6818, 'learning_rate': 6.5718750000000005e-06, 'epoch': 134.38}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "106c6b4e9014416ea5e5192bee946771",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.4881558418273926, 'eval_runtime': 0.176, 'eval_samples_per_second': 45.444, 'eval_steps_per_second': 45.444, 'epoch': 134.38}\n",
"{'loss': 0.6862, 'learning_rate': 6.2593750000000005e-06, 'epoch': 137.5}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5058241643f64d8da2796c495b140eb1",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.4938278198242188, 'eval_runtime': 0.182, 'eval_samples_per_second': 43.946, 'eval_steps_per_second': 43.946, 'epoch': 137.5}\n",
"{'loss': 0.6929, 'learning_rate': 5.9468750000000006e-06, 'epoch': 140.62}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "aeac0f8fd31d4c05861f10594099ec3c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.497659683227539, 'eval_runtime': 0.183, 'eval_samples_per_second': 43.706, 'eval_steps_per_second': 43.706, 'epoch': 140.62}\n",
"{'loss': 0.6494, 'learning_rate': 5.634375000000001e-06, 'epoch': 143.75}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "547c03987efe4ebd8326d2784cfb0be4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.519473075866699, 'eval_runtime': 0.179, 'eval_samples_per_second': 44.683, 'eval_steps_per_second': 44.683, 'epoch': 143.75}\n",
"{'loss': 0.6689, 'learning_rate': 5.323437500000001e-06, 'epoch': 146.88}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9ad7adb0ccdc44659ac5e95b6131e64b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.518462657928467, 'eval_runtime': 0.19, 'eval_samples_per_second': 42.096, 'eval_steps_per_second': 42.096, 'epoch': 146.88}\n",
"{'loss': 0.6492, 'learning_rate': 5.010937500000001e-06, 'epoch': 150.0}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7ca7981c9d404487a928ee97fe3c4ff3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.5259156227111816, 'eval_runtime': 0.1726, 'eval_samples_per_second': 46.361, 'eval_steps_per_second': 46.361, 'epoch': 150.0}\n",
"{'loss': 0.6384, 'learning_rate': 4.698437500000001e-06, 'epoch': 153.12}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "428f0035e0cc4f148cfbf8de85d50740",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.5258779525756836, 'eval_runtime': 0.175, 'eval_samples_per_second': 45.704, 'eval_steps_per_second': 45.704, 'epoch': 153.12}\n",
"{'loss': 0.6435, 'learning_rate': 4.3875e-06, 'epoch': 156.25}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "457558de857c4c039153f6fbb62e9acf",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.5287444591522217, 'eval_runtime': 0.199, 'eval_samples_per_second': 40.192, 'eval_steps_per_second': 40.192, 'epoch': 156.25}\n",
"{'loss': 0.6251, 'learning_rate': 4.075e-06, 'epoch': 159.38}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6f2c0045401d47e3bb1aaf6c028fc61c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.5284457206726074, 'eval_runtime': 0.1878, 'eval_samples_per_second': 42.598, 'eval_steps_per_second': 42.598, 'epoch': 159.38}\n",
"{'loss': 0.6295, 'learning_rate': 3.7625e-06, 'epoch': 162.5}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "53e5625cdb484914a5a66af2567de8fd",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.539799213409424, 'eval_runtime': 0.1835, 'eval_samples_per_second': 43.585, 'eval_steps_per_second': 43.585, 'epoch': 162.5}\n",
"{'loss': 0.6324, 'learning_rate': 3.45e-06, 'epoch': 165.62}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7f9fb0004cdd4e97a47bc13ab443cf1b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.544182777404785, 'eval_runtime': 0.1705, 'eval_samples_per_second': 46.909, 'eval_steps_per_second': 46.909, 'epoch': 165.62}\n",
"{'loss': 0.6252, 'learning_rate': 3.1375e-06, 'epoch': 168.75}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "27e633aa0507449e8bc14b17970c30ef",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.5480570793151855, 'eval_runtime': 0.192, 'eval_samples_per_second': 41.657, 'eval_steps_per_second': 41.657, 'epoch': 168.75}\n",
"{'loss': 0.6108, 'learning_rate': 2.825e-06, 'epoch': 171.88}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "80dc6bab1c6f4262bc9bb356c75ac0e4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.5455217361450195, 'eval_runtime': 0.184, 'eval_samples_per_second': 43.469, 'eval_steps_per_second': 43.469, 'epoch': 171.88}\n",
"{'loss': 0.6034, 'learning_rate': 2.5125e-06, 'epoch': 175.0}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c041a3b97b844ac3b6e742397ec0c24a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.5502476692199707, 'eval_runtime': 0.175, 'eval_samples_per_second': 45.704, 'eval_steps_per_second': 45.704, 'epoch': 175.0}\n",
"{'loss': 0.5969, 'learning_rate': 2.2e-06, 'epoch': 178.12}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "496ac402e97a451fad9d3a5af6c140d9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.560110330581665, 'eval_runtime': 0.1855, 'eval_samples_per_second': 43.119, 'eval_steps_per_second': 43.119, 'epoch': 178.12}\n",
"{'loss': 0.5949, 'learning_rate': 1.8875000000000001e-06, 'epoch': 181.25}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "1b180ba4412c4f708731df62d009554d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.561746835708618, 'eval_runtime': 0.1869, 'eval_samples_per_second': 42.799, 'eval_steps_per_second': 42.799, 'epoch': 181.25}\n",
"{'loss': 0.6183, 'learning_rate': 1.5750000000000002e-06, 'epoch': 184.38}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "61059355fde84d52bd9b3046e126e821",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.5679402351379395, 'eval_runtime': 0.182, 'eval_samples_per_second': 43.946, 'eval_steps_per_second': 43.946, 'epoch': 184.38}\n",
"{'loss': 0.5805, 'learning_rate': 1.2625000000000002e-06, 'epoch': 187.5}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "913150b439a343b0b75a5b1b79c4ef0f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.568711996078491, 'eval_runtime': 0.17, 'eval_samples_per_second': 47.048, 'eval_steps_per_second': 47.048, 'epoch': 187.5}\n",
"{'loss': 0.6032, 'learning_rate': 9.500000000000001e-07, 'epoch': 190.62}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "97a0dd51755942c4914784ab9e7c778f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.5707955360412598, 'eval_runtime': 0.169, 'eval_samples_per_second': 47.327, 'eval_steps_per_second': 47.327, 'epoch': 190.62}\n",
"{'loss': 0.5955, 'learning_rate': 6.375e-07, 'epoch': 193.75}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "516bb14a46de45f9a0889607dd3bf86d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.5708837509155273, 'eval_runtime': 0.175, 'eval_samples_per_second': 45.704, 'eval_steps_per_second': 45.704, 'epoch': 193.75}\n",
"{'loss': 0.5961, 'learning_rate': 3.25e-07, 'epoch': 196.88}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "11c15241f36b44c8bdf8426654a49334",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.5712759494781494, 'eval_runtime': 0.185, 'eval_samples_per_second': 43.233, 'eval_steps_per_second': 43.233, 'epoch': 196.88}\n",
"{'loss': 0.5914, 'learning_rate': 1.2500000000000001e-08, 'epoch': 200.0}\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "62521ef7f1c94705abf58d559a082eb1",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.571309804916382, 'eval_runtime': 0.171, 'eval_samples_per_second': 46.773, 'eval_steps_per_second': 46.773, 'epoch': 200.0}\n",
"{'train_runtime': 1248.1602, 'train_samples_per_second': 10.255, 'train_steps_per_second': 10.255, 'train_loss': 1.073981523513794, 'epoch': 200.0}\n"
]
},
{
"data": {
"text/plain": [
"TrainOutput(global_step=12800, training_loss=1.073981523513794, metrics={'train_runtime': 1248.1602, 'train_samples_per_second': 10.255, 'train_steps_per_second': 10.255, 'train_loss': 1.073981523513794, 'epoch': 200.0})"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"training_args = Seq2SeqTrainingArguments(\n",
" output_dir=\"files/lesson-summarization\",\n",
" evaluation_strategy=\"steps\",\n",
" learning_rate=2e-5,\n",
" per_device_train_batch_size=1,\n",
" per_device_eval_batch_size=1,\n",
" weight_decay=0.01,\n",
" save_total_limit=3,\n",
" num_train_epochs=200,\n",
" predict_with_generate=True,\n",
" fp16=True,\n",
" push_to_hub=True,\n",
" save_steps=400,\n",
" logging_steps=200\n",
" )\n",
"\n",
"trainer = Seq2SeqTrainer(\n",
" model=model,\n",
" args=training_args,\n",
" train_dataset=train_dataset,\n",
" eval_dataset=test_dataset,\n",
" tokenizer=tokenizer,\n",
" data_collator=data_collator\n",
" )\n",
"\n",
"trainer.train()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c6482574ad294bc3a748b86db8aba3ff",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Upload file pytorch_model.bin: 0%| | 1.00/231M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "789640dfa1324aeb8141a040ecf69eec",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Upload file runs/Aug16_07-35-41_LAPTOP-5QD1OBBG/events.out.tfevents.1692151545.LAPTOP-5QD1OBBG.25224.0: 0%| …"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"To https://huggingface.co/zuu/lesson-summarization\n",
" 79bde6c..63683e0 main -> main\n",
"\n",
"To https://huggingface.co/zuu/lesson-summarization\n",
" 63683e0..f9c9ac5 main -> main\n",
"\n"
]
}
],
"source": [
"trainer.save_model('./weights/lesson-summarization')"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Trained Model Loaded !!!\n"
]
}
],
"source": [
"pipeline_lesson = pipeline(\n",
" task=\"summarization\",\n",
" model=\"./weights/lesson-summarization\",\n",
" device=0\n",
" )\n",
"\n",
"print(\"Trained Model Loaded !!!\")"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"def read_pdf_data(pdf_path):\n",
" pdf_file = open(pdf_path, 'rb')\n",
" pdf_reader = PyPDF2.PdfFileReader(pdf_file)\n",
" num_pages = pdf_reader.getNumPages()\n",
"\n",
" whole_text = ''\n",
" for page in range(num_pages):\n",
" page_obj = pdf_reader.getPage(page)\n",
" text = page_obj.extractText()\n",
" whole_text += f\" {text}\"\n",
" pdf_file.close()\n",
"\n",
" # split this text into paragraphs\n",
" sentences = whole_text.split('\\n')\n",
" sen_lengths = [len(sen) for sen in sentences]\n",
" avg_sen_length = np.mean(sen_lengths)\n",
" avg_sen_length = 80\n",
"\n",
" # split into paragraphs\n",
" paragraphs = []\n",
" paragraph = ''\n",
" for sentence in sentences:\n",
" if len(sentence) > avg_sen_length:\n",
" paragraph += f\" {sentence}\"\n",
" else:\n",
" paragraphs.append(paragraph)\n",
" paragraph = ''\n",
" return paragraphs\n",
"\n",
"def inference_lesson_summarizer(pdf_path):\n",
" paragraphs = read_pdf_data(pdf_path)\n",
" summarized_text = ''\n",
" for paragraph in paragraphs:\n",
" summary_paragraph = pipeline_lesson(paragraph)[0]['summary_text']\n",
" summarized_text += f\"{summary_paragraph}\\n\\n\"\n",
"\n",
" return summarized_text"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Your max_length is set to 200, but your input_length is only 110. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=55)\n",
"Your max_length is set to 200, but your input_length is only 130. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=65)\n",
"Your max_length is set to 200, but your input_length is only 173. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=86)\n",
"Your max_length is set to 200, but your input_length is only 81. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=40)\n",
"Your max_length is set to 200, but your input_length is only 128. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=64)\n",
"Your max_length is set to 200, but your input_length is only 137. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=68)\n",
"Your max_length is set to 200, but your input_length is only 99. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=49)\n",
"Your max_length is set to 200, but your input_length is only 139. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=69)\n"
]
}
],
"source": [
"summarized_text = inference_lesson_summarizer('data/TextsummarizationData/Data/PDF1_Data.pdf')"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Computer memory is represented as a sequence of bytes, grouped into words. Bytes have unique addresses or indices. The size of a word and byte determines the amount of memory that can be accessed. In C, a pointer is a variable that stores the memory address of another variable, which can also be a pointeder.\n",
"\n",
"The value pointed to by a pointer can be retrieved using the unary * operator, for example: int *p, and int x = *p;. The memory address of a variable can be obtained using the Unary ampersand (& ) operator, such as int*p = &x; to retrieve the value points to. In C, pointers use consecutive memory addresses without being able to identify them in the code\n",
"\n",
"Pointer arithmetic can be used to adjust w here a pointer pointer. For example, if pc points to the first element of an array, after executing pC+=3; then c points towards the fourth element. A pointer can even be dereferenced using array notation. For an array c, for example, the value of c[2] represents the value that the array el ement represents which is two elements beyond the array element currently pointed to\n",
"\n",
"In the second lecture, we defined functions that took an array as an argument. For instance, void reverse(char s[] affects the values of the array. It affects both the local value and the array itself.\n",
"\n",
"C allows the creation of arrays of pointers, such as int *a[5]. These arrays are particularly useful in strings. For instance, in C's support for command line arguments, main(int argc, char *argv[]), argv is an array of character pointers representing the command line argument.\n",
"\n",
"In C, to define an instance of a structure called circle, we write struct circle c;. Structures can also be initialized with values using the syntax 'struct circles c = 12, 23, 5;'.An automatic or local structure variable can be initialised by a function call. For instance, structure ci rcle c = circle_init(); initializes the structure variable c with values specified by the C function.A structure can be declared and int.Int y.\n",
"\n",
"In C, programmers can use pointers to functions, enabling functions to be passed as arguments to other functions. This allows for increased flexibility and parameterization of algorithms. For instance, a sorting algorithm can be designed to accept a pointer to a comparison function.\n",
"\n",
"A structure membe r can be accessed using the notation '. notation: 'structname.member; for example: pt.x', while comparisons between structures (pt1 > Pt2) are not defined, meaning points to structures can be defined using the 'structure circle *pc' operator, but it can look cumbersome, such as (*pc).x).\n",
"\n",
"\n"
]
}
],
"source": [
"print(summarized_text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "torch113",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment