"cases_simple = df['case in simple word'].tolist()\n",
"cases_simple = [i.replace('\\n', '').strip() for i in cases_simple]\n",
"\n",
"ViolateFlag = df['ViolateFlag'].tolist()\n",
"class_dict = {'Yes': 1, 'No': 0}\n",
"ViolateFlag = [i.replace('\\n', '').strip() for i in ViolateFlag]\n",
"ViolateFlags = [class_dict[i] for i in ViolateFlag]"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"loading file vocab.txt from cache at C:\\Users\\Legion/.cache\\huggingface\\hub\\models--distilbert-base-uncased\\snapshots\\1c4513b2eedbda136f57676a34eea67aba266e5c\\vocab.txt\n",
"loading file tokenizer.json from cache at C:\\Users\\Legion/.cache\\huggingface\\hub\\models--distilbert-base-uncased\\snapshots\\1c4513b2eedbda136f57676a34eea67aba266e5c\\tokenizer.json\n",
"loading file added_tokens.json from cache at None\n",
"loading file special_tokens_map.json from cache at None\n",
"loading file tokenizer_config.json from cache at C:\\Users\\Legion/.cache\\huggingface\\hub\\models--distilbert-base-uncased\\snapshots\\1c4513b2eedbda136f57676a34eea67aba266e5c\\tokenizer_config.json\n",
"loading configuration file config.json from cache at C:\\Users\\Legion/.cache\\huggingface\\hub\\models--distilbert-base-uncased\\snapshots\\1c4513b2eedbda136f57676a34eea67aba266e5c\\config.json\n",
"loading configuration file config.json from cache at C:\\Users\\Legion/.cache\\huggingface\\hub\\models--distilbert-base-uncased\\snapshots\\1c4513b2eedbda136f57676a34eea67aba266e5c\\config.json\n",
"Model config DistilBertConfig {\n",
" \"activation\": \"gelu\",\n",
" \"architectures\": [\n",
" \"DistilBertForMaskedLM\"\n",
" ],\n",
" \"attention_dropout\": 0.1,\n",
" \"dim\": 768,\n",
" \"dropout\": 0.1,\n",
" \"hidden_dim\": 3072,\n",
" \"initializer_range\": 0.02,\n",
" \"max_position_embeddings\": 512,\n",
" \"model_type\": \"distilbert\",\n",
" \"n_heads\": 12,\n",
" \"n_layers\": 6,\n",
" \"pad_token_id\": 0,\n",
" \"qa_dropout\": 0.1,\n",
" \"seq_classif_dropout\": 0.2,\n",
" \"sinusoidal_pos_embds\": false,\n",
" \"tie_weights_\": true,\n",
" \"transformers_version\": \"4.24.0\",\n",
" \"vocab_size\": 30522\n",
"}\n",
"\n",
"loading weights file pytorch_model.bin from cache at C:\\Users\\Legion/.cache\\huggingface\\hub\\models--distilbert-base-uncased\\snapshots\\1c4513b2eedbda136f57676a34eea67aba266e5c\\pytorch_model.bin\n",
"Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.weight', 'vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_transform.bias', 'vocab_transform.weight']\n",
"- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
"Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
"The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n",
"c:\\Users\\Legion\\.conda\\envs\\torch111\\lib\\site-packages\\transformers\\optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
"predict(\"I am a journalist and union official at 'Associated Newspapers of Ceylon Ltd.' My transfer, ordered by Rohana Ariyarathna, is illegal, arbitrary, and a violation of my constitutional rights. The Rohana Ariyarathna does not have the authority to transfer union officials, and I am seeking to set aside the transfer letter.I request you to perform the ceremony for me.\")"