Commit 39dd25ff authored by Lelkada L L P S M's avatar Lelkada L L P S M

path changes

parent aa89276d
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "693e7624-72ba-4d7a-b00a-0df2e892302c",
"metadata": {},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid character '‘' (U+2018) (3180168019.py, line 2)",
"output_type": "error",
"traceback": [
"\u001b[0;36m File \u001b[0;32m\"/tmp/ipykernel_409/3180168019.py\"\u001b[0;36m, line \u001b[0;32m2\u001b[0m\n\u001b[0;31m dict_df = pd.DataFrame(columns = [‘lead’, ‘follow’, ‘freq’])\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid character '‘' (U+2018)\n"
]
}
],
"source": [
"import pandas as pd\n",
"dict_df = pd.DataFrame(columns = [‘lead’, ‘follow’, ‘freq’])\n",
"dict_df['lead']=words\n",
"follow = words[1:]\n",
"follow.append('EndWord')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b6f91348-69f8-494e-abb4-6efbd12fdd03",
"metadata": {},
"outputs": [],
"source": [
"end_words = []\n",
"for word in words:\n",
" if word[-1] in ['.','!','?'] and word != '.':\n",
" end_words.append(word)\n",
"print(end_words)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "35b335d4-638b-458b-9ea7-b93d36cd2f02",
"metadata": {},
"outputs": [],
"source": [
"dict_df['freq']= dict_df.groupby(by=['lead','follow'])['lead','follow'].transform('count').copy()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0ba7366c-384e-495a-8665-46544ce04413",
"metadata": {},
"outputs": [],
"source": [
"dict_df = dict_df.drop_duplicates()\n",
"pivot_df = dict_df.pivot(index = 'lead', columns= 'follow', values='freq')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eb193218-4325-452e-adb8-64c2ffc89ff3",
"metadata": {},
"outputs": [],
"source": [
"sum_words = pivot_df.sum(axis=1)\n",
"pivot_df = pivot_df.apply(lambda x: x/sum_words)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "d1871637-a4cf-49e6-ae20-5cfb52bf2347",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'pivot_df' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/ipykernel_409/1146123729.py\u001b[0m in \u001b[0;36m<cell line: 20>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0msentence\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m' '\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msentence\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0msentence\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 20\u001b[0;31m \u001b[0msentence\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmake_a_sentence\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Donald'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/tmp/ipykernel_409/1146123729.py\u001b[0m in \u001b[0;36mmake_a_sentence\u001b[0;34m(start)\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0msentence\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mword\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msentence\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m30\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mnext_word\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mchoice\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpivot_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mpivot_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mpivot_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m \u001b[0;34m==\u001b[0m\u001b[0mword\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnext_word\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'EndWord'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mcontinue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'pivot_df' is not defined"
]
}
],
"source": [
"from numpy.random import choice\n",
"def make_a_sentence(start):\n",
" word= start\n",
" sentence=[word]\n",
" while len(sentence) < 30:\n",
" next_word = choice(a = list(pivot_df.columns), p = (pivot_df.iloc[pivot_df.index ==word].fillna(0).values)[0])\n",
" if next_word == 'EndWord':\n",
" continue\n",
" elif next_word in end_words:\n",
" if len(sentence) > 2: \n",
" sentence.append(next_word)\n",
" break\n",
" else :\n",
" continue\n",
" else :\n",
" sentence.append(next_word)\n",
" word=next_word\n",
" sentence = ' '.join(sentence)\n",
" return sentence\n",
"sentence = make_a_sentence('Donald')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e3004fbb-2f65-4407-ab93-56b479a10e81",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "default:Python",
"language": "python",
"name": "conda-env-default-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment