"encoded_inputs = [tokenizer(word, return_tensors='tf') for word in words]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "de2ce194-c5b6-48c6-bab9-6687abd51077",
"metadata": {},
"outputs": [],
"source": [
"encoded_inputs[0]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "d2d4363e-eec8-4559-8ea1-850ce2319b2c",
"metadata": {},
"outputs": [],
"source": [
"embeddings = []\n",
"with tf.device('/GPU:0'):\n",
" for encoded_input in encoded_inputs:\n",
" outputs = model(encoded_input)\n",
" embedding = outputs[0][0].numpy()\n",
" embeddings.append(embedding)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "dff2f4e7-a31b-439c-93b4-a9b75819d56a",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.7/site-packages/pandas/core/internals/construction.py:540: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
" values = np.array([convert(v) for v in values])\n"