{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"cell_id": "af6850ba-bf23-4bbd-b58c-fea6b29e9500",
"deepnote_cell_type": "markdown",
"tags": []
},
"source": [
"# Review\n",
"\n",
"Today's class was review.\n",
"\n",
"[Recording of lecture from 1/28/2022](https://uci.zoom.us/rec/share/OxfhTaj6tVZ4ysRJC_U2Nj3vGo0Hiqn-vm8WgGK-ANcfV5hahZLYPRCQFj60xI2a.MTVnvxxTkIXzkvLS?startTime=1643385481000)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"cell_id": "bf0ed2ee-ca49-4236-976e-ddd5a4c7978d",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 300,
"execution_start": 1643385441255,
"source_hash": "7baafd37",
"tags": []
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import altair as alt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Dictionaries, and their relationship to pandas Series and pandas DataFrames."
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"cell_id": "e17de02e-bfb1-44b7-8007-98ecad8f04c7",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
21.1875
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 2,
"execution_start": 1643385790842,
"source_hash": "6a294ecd",
"tags": []
},
"outputs": [],
"source": [
"d = {\"a\":5,\"b\":10,\"chris\":30}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cell_id": "d9c35a6f-e750-48e9-9ad6-b181ca907ae7",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
1
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 345,
"execution_start": 1643385807114,
"source_hash": "15f90960",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"10"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d[\"b\"]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"cell_id": "75287e85-6ff8-4d16-9a24-178df9b8875c",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_start": 1643385840275,
"source_hash": "62db6b48",
"tags": []
},
"outputs": [],
"source": [
"e = {\"a\":[5,6,7],\"b\":10,\"chris\":[30,2.4,-20]}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Making a DataFrame from a dictionary."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"cell_id": "ccb3054b-9dce-4cc3-9093-64fcac225051",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 2,
"execution_start": 1643385925252,
"source_hash": "e08c1f7f",
"tags": []
},
"outputs": [],
"source": [
"df = pd.DataFrame(e)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"cell_id": "cdc110ed-cbdd-4afc-ba91-0bc9b5420537",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 304,
"execution_start": 1643385956633,
"source_hash": "f804c160",
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" chris | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 5 | \n",
" 10 | \n",
" 30.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 6 | \n",
" 10 | \n",
" 2.4 | \n",
"
\n",
" \n",
" 2 | \n",
" 7 | \n",
" 10 | \n",
" -20.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b chris\n",
"0 5 10 30.0\n",
"1 6 10 2.4\n",
"2 7 10 -20.0"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"cell_id": "b7347bc9-62cb-49f2-abc7-f176f0f8ff96",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
1
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 5,
"execution_start": 1643385932131,
"source_hash": "b165b385",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"0 5\n",
"1 6\n",
"2 7\n",
"Name: a, dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.a"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"cell_id": "540229e4-e89b-448f-8305-dd73476a3e2d",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
1
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 4,
"execution_start": 1643385940313,
"source_hash": "38ded7f3",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"0 5\n",
"1 6\n",
"2 7\n",
"Name: a, dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"a\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Making a Series from a dictionary."
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"cell_id": "48e7e248-31bd-46f2-92b7-49aad02a2d0e",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
1
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 18,
"execution_start": 1643385881115,
"source_hash": "ef1cac7a",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"a 5\n",
"b 10\n",
"chris 30\n",
"dtype: int64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.Series(d)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cell_id": "3d472dc7-3ab9-4753-b973-bb384589c577",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 21,
"execution_start": 1643385993312,
"source_hash": "f804c160",
"tags": []
},
"outputs": [
{
"data": {
"application/vnd.deepnote.dataframe.v3+json": {
"column_count": 3,
"columns": [
{
"dtype": "int64",
"name": "a",
"stats": {
"histogram": [
{
"bin_end": 5.2,
"bin_start": 5,
"count": 1
},
{
"bin_end": 5.4,
"bin_start": 5.2,
"count": 0
},
{
"bin_end": 5.6,
"bin_start": 5.4,
"count": 0
},
{
"bin_end": 5.8,
"bin_start": 5.6,
"count": 0
},
{
"bin_end": 6,
"bin_start": 5.8,
"count": 0
},
{
"bin_end": 6.2,
"bin_start": 6,
"count": 1
},
{
"bin_end": 6.4,
"bin_start": 6.2,
"count": 0
},
{
"bin_end": 6.6,
"bin_start": 6.4,
"count": 0
},
{
"bin_end": 6.8,
"bin_start": 6.6,
"count": 0
},
{
"bin_end": 7,
"bin_start": 6.8,
"count": 1
}
],
"max": "7",
"min": "5",
"nan_count": 0,
"unique_count": 3
}
},
{
"dtype": "int64",
"name": "b",
"stats": {
"histogram": [
{
"bin_end": 9.6,
"bin_start": 9.5,
"count": 0
},
{
"bin_end": 9.7,
"bin_start": 9.6,
"count": 0
},
{
"bin_end": 9.8,
"bin_start": 9.7,
"count": 0
},
{
"bin_end": 9.9,
"bin_start": 9.8,
"count": 0
},
{
"bin_end": 10,
"bin_start": 9.9,
"count": 0
},
{
"bin_end": 10.1,
"bin_start": 10,
"count": 3
},
{
"bin_end": 10.2,
"bin_start": 10.1,
"count": 0
},
{
"bin_end": 10.3,
"bin_start": 10.2,
"count": 0
},
{
"bin_end": 10.4,
"bin_start": 10.3,
"count": 0
},
{
"bin_end": 10.5,
"bin_start": 10.4,
"count": 0
}
],
"max": "10",
"min": "10",
"nan_count": 0,
"unique_count": 1
}
},
{
"dtype": "float64",
"name": "chris",
"stats": {
"histogram": [
{
"bin_end": -15,
"bin_start": -20,
"count": 1
},
{
"bin_end": -10,
"bin_start": -15,
"count": 0
},
{
"bin_end": -5,
"bin_start": -10,
"count": 0
},
{
"bin_end": 0,
"bin_start": -5,
"count": 0
},
{
"bin_end": 5,
"bin_start": 0,
"count": 1
},
{
"bin_end": 10,
"bin_start": 5,
"count": 0
},
{
"bin_end": 15,
"bin_start": 10,
"count": 0
},
{
"bin_end": 20,
"bin_start": 15,
"count": 0
},
{
"bin_end": 25,
"bin_start": 20,
"count": 0
},
{
"bin_end": 30,
"bin_start": 25,
"count": 1
}
],
"max": "30.0",
"min": "-20.0",
"nan_count": 0,
"unique_count": 3
}
},
{
"dtype": "int64",
"name": "_deepnote_index_column"
}
],
"row_count": 3,
"rows": [
{
"_deepnote_index_column": 0,
"a": 5,
"b": 10,
"chris": 30
},
{
"_deepnote_index_column": 1,
"a": 6,
"b": 10,
"chris": 2.4
},
{
"_deepnote_index_column": 2,
"a": 7,
"b": 10,
"chris": -20
}
]
},
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" chris | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 5 | \n",
" 10 | \n",
" 30.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 6 | \n",
" 10 | \n",
" 2.4 | \n",
"
\n",
" \n",
" 2 | \n",
" 7 | \n",
" 10 | \n",
" -20.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b chris\n",
"0 5 10 30.0\n",
"1 6 10 2.4\n",
"2 7 10 -20.0"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cell_id": "6c425055-3982-4910-99f4-f64724a14206",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
1
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 4,
"execution_start": 1643385993338,
"source_hash": "25d43fa0",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['a', 'b', 'chris'], dtype='object')"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## List comprehension"
]
},
{
"cell_type": "markdown",
"metadata": {
"cell_id": "b2c78b4e-4d99-4824-99e5-6a2d2a83acc5",
"deepnote_cell_type": "markdown",
"tags": []
},
"source": [
"Practice exercise:\n",
"* Make a list of `True`s (bool type) using list comprehension, where the length of the list is the number of rows in `df`."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"cell_id": "07ce0717-e02a-4a20-8561-3e4a974f9234",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
1
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 6,
"execution_start": 1643386278671,
"source_hash": "5d8ad24d",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"['true', 'true', 'true']"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# These are strings, not bools\n",
"['true' for x in range(3)]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"cell_id": "0cfaacfb-6711-491b-91d4-5f66b8481c34",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
1
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 5,
"execution_start": 1643386256002,
"source_hash": "7cf516b",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"['true', 'true', 'true']"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[\"true\" for x in range(len(df))]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"cell_id": "ae2ab7a4-28b6-4f71-b4e1-be49e864a051",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 17,
"execution_start": 1643386406029,
"source_hash": "ac64be90",
"tags": []
},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'true' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/var/folders/8j/gshrlmtn7dg4qtztj4d4t_w40000gn/T/ipykernel_37915/3159580370.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;34m[\u001b[0m\u001b[0mtrue\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/var/folders/8j/gshrlmtn7dg4qtztj4d4t_w40000gn/T/ipykernel_37915/3159580370.py\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;34m[\u001b[0m\u001b[0mtrue\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'true' is not defined"
]
}
],
"source": [
"[true for x in range(len(df))]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here is the correct answer."
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"cell_id": "72829fa8-e4cb-4638-af02-a19a67ca9bd2",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
1
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 5,
"execution_start": 1643386417846,
"source_hash": "4a58405d",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"[True, True, True]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[True for x in range(len(df))]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Putting a new column in a DataFrame\n",
"\n",
"One way to create a new column."
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"cell_id": "9d4cf038-cd0e-4c04-a5d6-f9fe714697e9",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 1,
"execution_start": 1643386477651,
"source_hash": "96acba45",
"tags": []
},
"outputs": [],
"source": [
"df[\"new column\"] = [True for x in range(len(df))]"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"cell_id": "8a603460-c5fd-4fee-a76b-020128bbcf98",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
118.953125
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 21,
"execution_start": 1643386499453,
"source_hash": "f804c160",
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" chris | \n",
" new column | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 5 | \n",
" 10 | \n",
" 30.0 | \n",
" True | \n",
"
\n",
" \n",
" 1 | \n",
" 6 | \n",
" 10 | \n",
" 2.4 | \n",
" True | \n",
"
\n",
" \n",
" 2 | \n",
" 7 | \n",
" 10 | \n",
" -20.0 | \n",
" True | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b chris new column\n",
"0 5 10 30.0 True\n",
"1 6 10 2.4 True\n",
"2 7 10 -20.0 True"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If the new column is filled with a single value, then you can make the new column faster:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cell_id": "162ef89e-ed72-477c-94cf-5ceb6fc8af3e",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 4,
"execution_start": 1643386548048,
"source_hash": "8e76f58a",
"tags": []
},
"outputs": [],
"source": [
"df[\"new column 2\"] = False"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cell_id": "7858a04b-ade8-436f-909b-70260f367ba4",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
118.953125
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 31,
"execution_start": 1643386552003,
"source_hash": "f804c160",
"tags": []
},
"outputs": [
{
"data": {
"application/vnd.deepnote.dataframe.v3+json": {
"column_count": 5,
"columns": [
{
"dtype": "int64",
"name": "a",
"stats": {
"histogram": [
{
"bin_end": 5.2,
"bin_start": 5,
"count": 1
},
{
"bin_end": 5.4,
"bin_start": 5.2,
"count": 0
},
{
"bin_end": 5.6,
"bin_start": 5.4,
"count": 0
},
{
"bin_end": 5.8,
"bin_start": 5.6,
"count": 0
},
{
"bin_end": 6,
"bin_start": 5.8,
"count": 0
},
{
"bin_end": 6.2,
"bin_start": 6,
"count": 1
},
{
"bin_end": 6.4,
"bin_start": 6.2,
"count": 0
},
{
"bin_end": 6.6,
"bin_start": 6.4,
"count": 0
},
{
"bin_end": 6.8,
"bin_start": 6.6,
"count": 0
},
{
"bin_end": 7,
"bin_start": 6.8,
"count": 1
}
],
"max": "7",
"min": "5",
"nan_count": 0,
"unique_count": 3
}
},
{
"dtype": "int64",
"name": "b",
"stats": {
"histogram": [
{
"bin_end": 9.6,
"bin_start": 9.5,
"count": 0
},
{
"bin_end": 9.7,
"bin_start": 9.6,
"count": 0
},
{
"bin_end": 9.8,
"bin_start": 9.7,
"count": 0
},
{
"bin_end": 9.9,
"bin_start": 9.8,
"count": 0
},
{
"bin_end": 10,
"bin_start": 9.9,
"count": 0
},
{
"bin_end": 10.1,
"bin_start": 10,
"count": 3
},
{
"bin_end": 10.2,
"bin_start": 10.1,
"count": 0
},
{
"bin_end": 10.3,
"bin_start": 10.2,
"count": 0
},
{
"bin_end": 10.4,
"bin_start": 10.3,
"count": 0
},
{
"bin_end": 10.5,
"bin_start": 10.4,
"count": 0
}
],
"max": "10",
"min": "10",
"nan_count": 0,
"unique_count": 1
}
},
{
"dtype": "float64",
"name": "chris",
"stats": {
"histogram": [
{
"bin_end": -15,
"bin_start": -20,
"count": 1
},
{
"bin_end": -10,
"bin_start": -15,
"count": 0
},
{
"bin_end": -5,
"bin_start": -10,
"count": 0
},
{
"bin_end": 0,
"bin_start": -5,
"count": 0
},
{
"bin_end": 5,
"bin_start": 0,
"count": 1
},
{
"bin_end": 10,
"bin_start": 5,
"count": 0
},
{
"bin_end": 15,
"bin_start": 10,
"count": 0
},
{
"bin_end": 20,
"bin_start": 15,
"count": 0
},
{
"bin_end": 25,
"bin_start": 20,
"count": 0
},
{
"bin_end": 30,
"bin_start": 25,
"count": 1
}
],
"max": "30.0",
"min": "-20.0",
"nan_count": 0,
"unique_count": 3
}
},
{
"dtype": "bool",
"name": "new column",
"stats": {
"categories": [
{
"count": 3,
"name": "True"
}
],
"nan_count": 0,
"unique_count": 1
}
},
{
"dtype": "bool",
"name": "new column 2",
"stats": {
"categories": [
{
"count": 3,
"name": "False"
}
],
"nan_count": 0,
"unique_count": 1
}
},
{
"dtype": "int64",
"name": "_deepnote_index_column"
}
],
"row_count": 3,
"rows": [
{
"_deepnote_index_column": 0,
"a": 5,
"b": 10,
"chris": 30,
"new column": true,
"new column 2": false
},
{
"_deepnote_index_column": 1,
"a": 6,
"b": 10,
"chris": 2.4,
"new column": true,
"new column 2": false
},
{
"_deepnote_index_column": 2,
"a": 7,
"b": 10,
"chris": -20,
"new column": true,
"new column 2": false
}
]
},
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" chris | \n",
" new column | \n",
" new column 2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 5 | \n",
" 10 | \n",
" 30.0 | \n",
" True | \n",
" False | \n",
"
\n",
" \n",
" 1 | \n",
" 6 | \n",
" 10 | \n",
" 2.4 | \n",
" True | \n",
" False | \n",
"
\n",
" \n",
" 2 | \n",
" 7 | \n",
" 10 | \n",
" -20.0 | \n",
" True | \n",
" False | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b chris new column new column 2\n",
"0 5 10 30.0 True False\n",
"1 6 10 2.4 True False\n",
"2 7 10 -20.0 True False"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Indexing\n",
"\n",
"Indexing using `iloc`."
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"cell_id": "ba6032d3-f47b-411d-a47f-e3501c9cd4a3",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 8,
"execution_start": 1643386757914,
"source_hash": "549c0088",
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" chris | \n",
" new column | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 5 | \n",
" 10 | \n",
" 30.0 | \n",
" True | \n",
"
\n",
" \n",
" 1 | \n",
" 20 | \n",
" 10 | \n",
" 2.4 | \n",
" True | \n",
"
\n",
" \n",
" 2 | \n",
" 7 | \n",
" 10 | \n",
" -20.0 | \n",
" True | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b chris new column\n",
"0 5 10 30.0 True\n",
"1 20 10 2.4 True\n",
"2 7 10 -20.0 True"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[1,0] = 20\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Indexing using `loc`."
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"cell_id": "64d8f5af-84aa-4706-9e76-2858824728f8",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 50,
"execution_start": 1643386786790,
"source_hash": "82352452",
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" chris | \n",
" new column | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 5 | \n",
" 10 | \n",
" 30.0 | \n",
" True | \n",
"
\n",
" \n",
" 1 | \n",
" 20 | \n",
" 10 | \n",
" 2.4 | \n",
" True | \n",
"
\n",
" \n",
" 2 | \n",
" 7 | \n",
" 10 | \n",
" -20.0 | \n",
" True | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b chris new column\n",
"0 5 10 30.0 True\n",
"1 20 10 2.4 True\n",
"2 7 10 -20.0 True"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[1,'a'] = 20\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {
"cell_id": "a8a1ef54-7fe9-4dd3-8cb9-bee16851af20",
"deepnote_cell_type": "markdown",
"tags": []
},
"source": [
"## Second largest value in a column.\n",
"\n",
"Find the second largest value in the \"a\" column of df."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cell_id": "f331642a-cea2-457a-96de-76dad08ce281",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
78.8125
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 10,
"execution_start": 1643386855790,
"source_hash": "38ded7f3",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"0 5\n",
"1 20\n",
"2 7\n",
"Name: a, dtype: int64"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"a\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cell_id": "c5022c3e-d91b-4de6-ac34-06553827fe2b",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
78.8125
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 2,
"execution_start": 1643386948119,
"source_hash": "e21b8e18",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"0 5\n",
"2 7\n",
"1 20\n",
"Name: a, dtype: int64"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"a\"].sort_values()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cell_id": "cfcca3cc-ba57-4fc8-a39e-50dedf4e53de",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
78.8125
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 6,
"execution_start": 1643386992997,
"source_hash": "765ab8e8",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"1 20\n",
"2 7\n",
"0 5\n",
"Name: a, dtype: int64"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"a\"].sort_values(ascending=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"I kept accidentally trying to use `[1]` instead of `iloc[1]`. Here is the correct way to find the element at index 1 in a pandas Series."
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"cell_id": "2887e8ce-6d9c-4744-9d6c-2f3f073ff020",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
21.1875
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 4,
"execution_start": 1643387027488,
"source_hash": "f57a6f13",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"7"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"a\"].sort_values(ascending=False).iloc[1]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Without the `ascending` keyword argument, we have to take the second-to-last entry."
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"cell_id": "180f3cb3-29ba-425e-82d5-aa080e61d658",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
21.1875
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 8,
"execution_start": 1643387069710,
"source_hash": "e9017366",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"7"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"a\"].sort_values().iloc[-2]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Practice with axis\n",
"\n",
"We haven't used `median` before, I don't think, but the following should make sense. The most important part is recognizing what `axis=0` means."
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" chris | \n",
" new column | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 5 | \n",
" 10 | \n",
" 30.0 | \n",
" True | \n",
"
\n",
" \n",
" 1 | \n",
" 20 | \n",
" 10 | \n",
" 2.4 | \n",
" True | \n",
"
\n",
" \n",
" 2 | \n",
" 7 | \n",
" 10 | \n",
" -20.0 | \n",
" True | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b chris new column\n",
"0 5 10 30.0 True\n",
"1 20 10 2.4 True\n",
"2 7 10 -20.0 True"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"cell_id": "320bf021-7f54-4bd6-b9f3-1d079c085c21",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
1
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 5,
"execution_start": 1643387140975,
"source_hash": "8a04b452",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"a 7.0\n",
"b 10.0\n",
"chris 2.4\n",
"new column 1.0\n",
"dtype: float64"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.median(axis=0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Flattening a NumPy array"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"cell_id": "f2660896-1b10-4465-a69d-a696a3233338",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 2,
"execution_start": 1643387221160,
"source_hash": "801acc99",
"tags": []
},
"outputs": [],
"source": [
"A = np.array([[2,5,1],[3,1,10]])"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"cell_id": "5fa02e49-4464-484f-9746-6f836b345372",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
21.1875
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 12,
"execution_start": 1643387292722,
"source_hash": "aea5aa59",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"array([ 2, 5, 1, 3, 1, 10])"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"A.reshape((-1))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Slicing"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cell_id": "b4c14ac2-fc37-4a58-922a-cd79ce4b5870",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 258,
"execution_start": 1643387327197,
"source_hash": "f804c160",
"tags": []
},
"outputs": [
{
"data": {
"application/vnd.deepnote.dataframe.v3+json": {
"column_count": 5,
"columns": [
{
"dtype": "int64",
"name": "a",
"stats": {
"histogram": [
{
"bin_end": 6.5,
"bin_start": 5,
"count": 1
},
{
"bin_end": 8,
"bin_start": 6.5,
"count": 1
},
{
"bin_end": 9.5,
"bin_start": 8,
"count": 0
},
{
"bin_end": 11,
"bin_start": 9.5,
"count": 0
},
{
"bin_end": 12.5,
"bin_start": 11,
"count": 0
},
{
"bin_end": 14,
"bin_start": 12.5,
"count": 0
},
{
"bin_end": 15.5,
"bin_start": 14,
"count": 0
},
{
"bin_end": 17,
"bin_start": 15.5,
"count": 0
},
{
"bin_end": 18.5,
"bin_start": 17,
"count": 0
},
{
"bin_end": 20,
"bin_start": 18.5,
"count": 1
}
],
"max": "20",
"min": "5",
"nan_count": 0,
"unique_count": 3
}
},
{
"dtype": "int64",
"name": "b",
"stats": {
"histogram": [
{
"bin_end": 9.6,
"bin_start": 9.5,
"count": 0
},
{
"bin_end": 9.7,
"bin_start": 9.6,
"count": 0
},
{
"bin_end": 9.8,
"bin_start": 9.7,
"count": 0
},
{
"bin_end": 9.9,
"bin_start": 9.8,
"count": 0
},
{
"bin_end": 10,
"bin_start": 9.9,
"count": 0
},
{
"bin_end": 10.1,
"bin_start": 10,
"count": 3
},
{
"bin_end": 10.2,
"bin_start": 10.1,
"count": 0
},
{
"bin_end": 10.3,
"bin_start": 10.2,
"count": 0
},
{
"bin_end": 10.4,
"bin_start": 10.3,
"count": 0
},
{
"bin_end": 10.5,
"bin_start": 10.4,
"count": 0
}
],
"max": "10",
"min": "10",
"nan_count": 0,
"unique_count": 1
}
},
{
"dtype": "float64",
"name": "chris",
"stats": {
"histogram": [
{
"bin_end": -15,
"bin_start": -20,
"count": 1
},
{
"bin_end": -10,
"bin_start": -15,
"count": 0
},
{
"bin_end": -5,
"bin_start": -10,
"count": 0
},
{
"bin_end": 0,
"bin_start": -5,
"count": 0
},
{
"bin_end": 5,
"bin_start": 0,
"count": 1
},
{
"bin_end": 10,
"bin_start": 5,
"count": 0
},
{
"bin_end": 15,
"bin_start": 10,
"count": 0
},
{
"bin_end": 20,
"bin_start": 15,
"count": 0
},
{
"bin_end": 25,
"bin_start": 20,
"count": 0
},
{
"bin_end": 30,
"bin_start": 25,
"count": 1
}
],
"max": "30.0",
"min": "-20.0",
"nan_count": 0,
"unique_count": 3
}
},
{
"dtype": "bool",
"name": "new column",
"stats": {
"categories": [
{
"count": 3,
"name": "True"
}
],
"nan_count": 0,
"unique_count": 1
}
},
{
"dtype": "bool",
"name": "new column 2",
"stats": {
"categories": [
{
"count": 3,
"name": "False"
}
],
"nan_count": 0,
"unique_count": 1
}
},
{
"dtype": "int64",
"name": "_deepnote_index_column"
}
],
"row_count": 3,
"rows": [
{
"_deepnote_index_column": 0,
"a": 5,
"b": 10,
"chris": 30,
"new column": true,
"new column 2": false
},
{
"_deepnote_index_column": 1,
"a": 20,
"b": 10,
"chris": 2.4,
"new column": true,
"new column 2": false
},
{
"_deepnote_index_column": 2,
"a": 7,
"b": 10,
"chris": -20,
"new column": true,
"new column 2": false
}
]
},
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" chris | \n",
" new column | \n",
" new column 2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 5 | \n",
" 10 | \n",
" 30.0 | \n",
" True | \n",
" False | \n",
"
\n",
" \n",
" 1 | \n",
" 20 | \n",
" 10 | \n",
" 2.4 | \n",
" True | \n",
" False | \n",
"
\n",
" \n",
" 2 | \n",
" 7 | \n",
" 10 | \n",
" -20.0 | \n",
" True | \n",
" False | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b chris new column new column 2\n",
"0 5 10 30.0 True False\n",
"1 20 10 2.4 True False\n",
"2 7 10 -20.0 True False"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Access every other element in the row with label 1 using slicing"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"cell_id": "aeffc566-d5a5-461d-b4c5-90c360aa683c",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
78.8125
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 2,
"execution_start": 1643387403078,
"source_hash": "fff7ff58",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"a 20\n",
"chris 2.4\n",
"Name: 1, dtype: object"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[1,::2]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Change every other element in the row with label 1 using slicing"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"cell_id": "28b7a6a2-6530-4abc-9ae1-ecb06349cb89",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 4,
"execution_start": 1643387479303,
"source_hash": "b0e70448",
"tags": []
},
"outputs": [
{
"ename": "ValueError",
"evalue": "Must have equal len keys and value when setting with an iterable",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/var/folders/8j/gshrlmtn7dg4qtztj4d4t_w40000gn/T/ipykernel_37915/1568438968.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0;36m2\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/miniconda3/envs/math11/lib/python3.9/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m__setitem__\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 721\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 722\u001b[0m \u001b[0miloc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"iloc\"\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 723\u001b[0;31m \u001b[0miloc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_setitem_with_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 724\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 725\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_validate_key\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/envs/math11/lib/python3.9/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_setitem_with_indexer\u001b[0;34m(self, indexer, value, name)\u001b[0m\n\u001b[1;32m 1728\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtake_split_path\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1729\u001b[0m \u001b[0;31m# We have to operate column-wise\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1730\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_setitem_with_indexer_split_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1731\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1732\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_setitem_single_block\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/envs/math11/lib/python3.9/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_setitem_with_indexer_split_path\u001b[0;34m(self, indexer, value, name)\u001b[0m\n\u001b[1;32m 1806\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1807\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1808\u001b[0;31m raise ValueError(\n\u001b[0m\u001b[1;32m 1809\u001b[0m \u001b[0;34m\"Must have equal len keys and value \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1810\u001b[0m \u001b[0;34m\"when setting with an iterable\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Must have equal len keys and value when setting with an iterable"
]
}
],
"source": [
"df.loc[1,::2] = [i**2 for i in range(3)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Change every other element in the row with label 1 using slicing"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cell_id": "bfea788c-addb-45e5-bfc8-cf2a45a3d99c",
"deepnote_cell_type": "code",
"tags": []
},
"outputs": [],
"source": [
"df.loc[1,::2] = [0,1,4]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cell_id": "c353bf34-0a49-4655-962b-e650fb70e997",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 37,
"execution_start": 1643387479310,
"source_hash": "f804c160",
"tags": []
},
"outputs": [
{
"data": {
"application/vnd.deepnote.dataframe.v3+json": {
"column_count": 5,
"columns": [
{
"dtype": "int64",
"name": "a",
"stats": {
"histogram": [
{
"bin_end": 0.7,
"bin_start": 0,
"count": 1
},
{
"bin_end": 1.4,
"bin_start": 0.7,
"count": 0
},
{
"bin_end": 2.0999999999999996,
"bin_start": 1.4,
"count": 0
},
{
"bin_end": 2.8,
"bin_start": 2.0999999999999996,
"count": 0
},
{
"bin_end": 3.5,
"bin_start": 2.8,
"count": 0
},
{
"bin_end": 4.199999999999999,
"bin_start": 3.5,
"count": 0
},
{
"bin_end": 4.8999999999999995,
"bin_start": 4.199999999999999,
"count": 0
},
{
"bin_end": 5.6,
"bin_start": 4.8999999999999995,
"count": 1
},
{
"bin_end": 6.3,
"bin_start": 5.6,
"count": 0
},
{
"bin_end": 7,
"bin_start": 6.3,
"count": 1
}
],
"max": "7",
"min": "0",
"nan_count": 0,
"unique_count": 3
}
},
{
"dtype": "int64",
"name": "b",
"stats": {
"histogram": [
{
"bin_end": 9.6,
"bin_start": 9.5,
"count": 0
},
{
"bin_end": 9.7,
"bin_start": 9.6,
"count": 0
},
{
"bin_end": 9.8,
"bin_start": 9.7,
"count": 0
},
{
"bin_end": 9.9,
"bin_start": 9.8,
"count": 0
},
{
"bin_end": 10,
"bin_start": 9.9,
"count": 0
},
{
"bin_end": 10.1,
"bin_start": 10,
"count": 3
},
{
"bin_end": 10.2,
"bin_start": 10.1,
"count": 0
},
{
"bin_end": 10.3,
"bin_start": 10.2,
"count": 0
},
{
"bin_end": 10.4,
"bin_start": 10.3,
"count": 0
},
{
"bin_end": 10.5,
"bin_start": 10.4,
"count": 0
}
],
"max": "10",
"min": "10",
"nan_count": 0,
"unique_count": 1
}
},
{
"dtype": "float64",
"name": "chris",
"stats": {
"histogram": [
{
"bin_end": -15,
"bin_start": -20,
"count": 1
},
{
"bin_end": -10,
"bin_start": -15,
"count": 0
},
{
"bin_end": -5,
"bin_start": -10,
"count": 0
},
{
"bin_end": 0,
"bin_start": -5,
"count": 0
},
{
"bin_end": 5,
"bin_start": 0,
"count": 1
},
{
"bin_end": 10,
"bin_start": 5,
"count": 0
},
{
"bin_end": 15,
"bin_start": 10,
"count": 0
},
{
"bin_end": 20,
"bin_start": 15,
"count": 0
},
{
"bin_end": 25,
"bin_start": 20,
"count": 0
},
{
"bin_end": 30,
"bin_start": 25,
"count": 1
}
],
"max": "30.0",
"min": "-20.0",
"nan_count": 0,
"unique_count": 3
}
},
{
"dtype": "bool",
"name": "new column",
"stats": {
"categories": [
{
"count": 3,
"name": "True"
}
],
"nan_count": 0,
"unique_count": 1
}
},
{
"dtype": "object",
"name": "new column 2",
"stats": {
"categories": [
{
"count": 2,
"name": "False"
},
{
"count": 1,
"name": "4"
}
],
"nan_count": 0,
"unique_count": 2
}
},
{
"dtype": "int64",
"name": "_deepnote_index_column"
}
],
"row_count": 3,
"rows": [
{
"_deepnote_index_column": 0,
"a": 5,
"b": 10,
"chris": 30,
"new column": true,
"new column 2": "False"
},
{
"_deepnote_index_column": 1,
"a": 0,
"b": 10,
"chris": 1,
"new column": true,
"new column 2": "4"
},
{
"_deepnote_index_column": 2,
"a": 7,
"b": 10,
"chris": -20,
"new column": true,
"new column 2": "False"
}
]
},
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" chris | \n",
" new column | \n",
" new column 2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 5 | \n",
" 10 | \n",
" 30.0 | \n",
" True | \n",
" False | \n",
"
\n",
" \n",
" 1 | \n",
" 0 | \n",
" 10 | \n",
" 1.0 | \n",
" True | \n",
" 4 | \n",
"
\n",
" \n",
" 2 | \n",
" 7 | \n",
" 10 | \n",
" -20.0 | \n",
" True | \n",
" False | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b chris new column new column 2\n",
"0 5 10 30.0 True False\n",
"1 0 10 1.0 True 4\n",
"2 7 10 -20.0 True False"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Square every element in a DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"cell_id": "e5476dc2-8793-4f66-af07-ece0ed9708df",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 15,
"execution_start": 1643387718414,
"source_hash": "8e461c83",
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" chris | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 5 | \n",
" 10 | \n",
" 30.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 20 | \n",
" 10 | \n",
" 2.4 | \n",
"
\n",
" \n",
" 2 | \n",
" 7 | \n",
" 10 | \n",
" -20.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b chris\n",
"0 5 10 30.0\n",
"1 20 10 2.4\n",
"2 7 10 -20.0"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2 = df.iloc[:,:3]\n",
"df2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cell_id": "06a97bc0-7695-4538-ac7a-3ebf0a8684c9",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
78.8125
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 2,
"execution_start": 1643387734047,
"source_hash": "62b9b55b",
"tags": []
},
"outputs": [],
"source": [
"df2.loc[1] = df2.loc[1]**2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cell_id": "ac03692d-e58d-4fe4-8340-9cc125eaf5a3",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 16,
"execution_start": 1643387736160,
"source_hash": "caa55e2e",
"tags": []
},
"outputs": [
{
"data": {
"application/vnd.deepnote.dataframe.v3+json": {
"column_count": 3,
"columns": [
{
"dtype": "float64",
"name": "a",
"stats": {
"histogram": [
{
"bin_end": 0.7,
"bin_start": 0,
"count": 1
},
{
"bin_end": 1.4,
"bin_start": 0.7,
"count": 0
},
{
"bin_end": 2.0999999999999996,
"bin_start": 1.4,
"count": 0
},
{
"bin_end": 2.8,
"bin_start": 2.0999999999999996,
"count": 0
},
{
"bin_end": 3.5,
"bin_start": 2.8,
"count": 0
},
{
"bin_end": 4.199999999999999,
"bin_start": 3.5,
"count": 0
},
{
"bin_end": 4.8999999999999995,
"bin_start": 4.199999999999999,
"count": 0
},
{
"bin_end": 5.6,
"bin_start": 4.8999999999999995,
"count": 1
},
{
"bin_end": 6.3,
"bin_start": 5.6,
"count": 0
},
{
"bin_end": 7,
"bin_start": 6.3,
"count": 1
}
],
"max": "7.0",
"min": "0.0",
"nan_count": 0,
"unique_count": 3
}
},
{
"dtype": "float64",
"name": "b",
"stats": {
"histogram": [
{
"bin_end": 19,
"bin_start": 10,
"count": 2
},
{
"bin_end": 28,
"bin_start": 19,
"count": 0
},
{
"bin_end": 37,
"bin_start": 28,
"count": 0
},
{
"bin_end": 46,
"bin_start": 37,
"count": 0
},
{
"bin_end": 55,
"bin_start": 46,
"count": 0
},
{
"bin_end": 64,
"bin_start": 55,
"count": 0
},
{
"bin_end": 73,
"bin_start": 64,
"count": 0
},
{
"bin_end": 82,
"bin_start": 73,
"count": 0
},
{
"bin_end": 91,
"bin_start": 82,
"count": 0
},
{
"bin_end": 100,
"bin_start": 91,
"count": 1
}
],
"max": "100.0",
"min": "10.0",
"nan_count": 0,
"unique_count": 2
}
},
{
"dtype": "float64",
"name": "chris",
"stats": {
"histogram": [
{
"bin_end": -15,
"bin_start": -20,
"count": 1
},
{
"bin_end": -10,
"bin_start": -15,
"count": 0
},
{
"bin_end": -5,
"bin_start": -10,
"count": 0
},
{
"bin_end": 0,
"bin_start": -5,
"count": 0
},
{
"bin_end": 5,
"bin_start": 0,
"count": 1
},
{
"bin_end": 10,
"bin_start": 5,
"count": 0
},
{
"bin_end": 15,
"bin_start": 10,
"count": 0
},
{
"bin_end": 20,
"bin_start": 15,
"count": 0
},
{
"bin_end": 25,
"bin_start": 20,
"count": 0
},
{
"bin_end": 30,
"bin_start": 25,
"count": 1
}
],
"max": "30.0",
"min": "-20.0",
"nan_count": 0,
"unique_count": 3
}
},
{
"dtype": "int64",
"name": "_deepnote_index_column"
}
],
"row_count": 3,
"rows": [
{
"_deepnote_index_column": 0,
"a": 5,
"b": 10,
"chris": 30
},
{
"_deepnote_index_column": 1,
"a": 0,
"b": 100,
"chris": 1
},
{
"_deepnote_index_column": 2,
"a": 7,
"b": 10,
"chris": -20
}
]
},
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" chris | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 5.0 | \n",
" 10.0 | \n",
" 30.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.0 | \n",
" 100.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 7.0 | \n",
" 10.0 | \n",
" -20.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b chris\n",
"0 5.0 10.0 30.0\n",
"1 0.0 100.0 1.0\n",
"2 7.0 10.0 -20.0"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"cell_id": "d1872d4c-3889-466f-804c-3286e6adbb86",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 29,
"execution_start": 1643387760375,
"source_hash": "b5f73824",
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" chris | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 25 | \n",
" 100 | \n",
" 900.00 | \n",
"
\n",
" \n",
" 1 | \n",
" 400 | \n",
" 100 | \n",
" 5.76 | \n",
"
\n",
" \n",
" 2 | \n",
" 49 | \n",
" 100 | \n",
" 400.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b chris\n",
"0 25 100 900.00\n",
"1 400 100 5.76\n",
"2 49 100 400.00"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2**2"
]
},
{
"cell_type": "markdown",
"metadata": {
"cell_id": "fd98728f-abe6-4572-bc6c-d3672b497207",
"deepnote_cell_type": "markdown",
"tags": []
},
"source": [
"Try doing that same thing (squaring every entry in df2) using `map`, `apply`, or `applymap`."
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"cell_id": "355adbf3-c513-439d-8365-45b34a7809ad",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 2,
"execution_start": 1643387920495,
"source_hash": "8061dcae",
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" chris | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 25 | \n",
" 100 | \n",
" 900.00 | \n",
"
\n",
" \n",
" 1 | \n",
" 400 | \n",
" 100 | \n",
" 5.76 | \n",
"
\n",
" \n",
" 2 | \n",
" 49 | \n",
" 100 | \n",
" 400.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b chris\n",
"0 25 100 900.00\n",
"1 400 100 5.76\n",
"2 49 100 400.00"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.applymap(lambda x: x**2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Warning**: notice that `applymap` doesn't change the original DataFrame."
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"cell_id": "7f22f075-8067-4ee7-b007-ba24616f19d5",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 5,
"execution_start": 1643387979938,
"source_hash": "caa55e2e",
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" chris | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 5 | \n",
" 10 | \n",
" 30.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 20 | \n",
" 10 | \n",
" 2.4 | \n",
"
\n",
" \n",
" 2 | \n",
" 7 | \n",
" 10 | \n",
" -20.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b chris\n",
"0 5 10 30.0\n",
"1 20 10 2.4\n",
"2 7 10 -20.0"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "markdown",
"metadata": {
"cell_id": "fe74e8ba-8cec-4220-bfd5-921f9a3c8cfb",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
117.21875
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 17,
"execution_start": 1643388184581,
"source_hash": "5cc90411",
"tags": []
},
"source": [
"## Example using apply"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"cell_id": "6874da94-7aeb-41cf-926c-c2ae40280861",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
78.8125
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 5,
"execution_start": 1643388015610,
"source_hash": "339b2fc5",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"a 32.0\n",
"b 30.0\n",
"chris 12.4\n",
"dtype: float64"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.apply(lambda c: c.sum(), axis = 0)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 45.0\n",
"1 32.4\n",
"2 -3.0\n",
"dtype: float64"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.apply(lambda r: r.sum(), axis = 1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"What if we tried to use `applymap` instead?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cell_id": "d0801cea-4262-4830-b507-29b01d36c6cd",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 26,
"execution_start": 1643388141729,
"source_hash": "adda5cc5",
"tags": []
},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'float' object has no attribute 'sum'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapplymap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mapplymap\u001b[0;34m(self, func, na_action)\u001b[0m\n\u001b[1;32m 7842\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap_infer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobject\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mignore_na\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mignore_na\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7843\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 7844\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minfer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__finalize__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"applymap\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7845\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7846\u001b[0m \u001b[0;31m# ----------------------------------------------------------------------\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, axis, raw, result_type, args, **kwds)\u001b[0m\n\u001b[1;32m 7766\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7767\u001b[0m )\n\u001b[0;32m-> 7768\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7769\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7770\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapplymap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mna_action\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mget_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_raw\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 184\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 185\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 186\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapply_empty_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 274\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 275\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 276\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mres_index\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_series_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 277\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 278\u001b[0m \u001b[0;31m# wrap results\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_series_generator\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 289\u001b[0m \u001b[0;31m# ignore SettingWithCopy here in case the user mutates\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 290\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 291\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mABCSeries\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[0;31m# If we have a view on v, we need to make a copy because\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36minfer\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 7840\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mempty\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7841\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap_infer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mignore_na\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mignore_na\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 7842\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap_infer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobject\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mignore_na\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mignore_na\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7843\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7844\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minfer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__finalize__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"applymap\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/lib.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m(a)\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapplymap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m: 'float' object has no attribute 'sum'"
]
}
],
"source": [
"df2.applymap(lambda a: a.sum())"
]
},
{
"cell_type": "markdown",
"metadata": {
"cell_id": "8a2f01ac-e621-4f78-a062-397e31c77383",
"deepnote_cell_type": "markdown",
"tags": []
},
"source": [
"Sample exercise: What causes the above error?\n",
"\n",
"Sample answer: `a` will be a number in the dataframe, and `number.sum()` does not make sense. Should use `apply` and `axis` instead."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"A smaller piece of code that raises the same error:"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'float' object has no attribute 'sum'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/var/folders/8j/gshrlmtn7dg4qtztj4d4t_w40000gn/T/ipykernel_37915/223743580.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0ma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m5.1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0ma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m: 'float' object has no attribute 'sum'"
]
}
],
"source": [
"a = 5.1\n",
"a.sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Of the three methods, `map`, `applymap`, and `apply`, definitely `apply` is the trickiest to understand. The most natural example with `apply` we have seen was using `pd.to_numeric` on every column. Notice how the input to `pd.to_numeric` should be an entire Series, not an individual entry."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Working with datetime entries"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"cell_id": "25702ca0-1d3f-44ab-9232-1307f50f61ea",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 23,
"execution_start": 1643388402781,
"source_hash": "6b2531b2",
"tags": []
},
"outputs": [],
"source": [
"df = pd.read_csv(\"../data/spotify_dataset.csv\", na_values=\" \")"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"cell_id": "5bbb132a-192d-4928-9db1-392a78349626",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
136.421875
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 16,
"execution_start": 1643388407679,
"source_hash": "25d43fa0",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Index', 'Highest Charting Position', 'Number of Times Charted',\n",
" 'Week of Highest Charting', 'Song Name', 'Streams', 'Artist',\n",
" 'Artist Followers', 'Song ID', 'Genre', 'Release Date', 'Weeks Charted',\n",
" 'Popularity', 'Danceability', 'Energy', 'Loudness', 'Speechiness',\n",
" 'Acousticness', 'Liveness', 'Tempo', 'Duration (ms)', 'Valence',\n",
" 'Chord'],\n",
" dtype='object')"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This way of getting a column from a DataFrame is called *attribute* access:"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"cell_id": "6c00faa6-0850-4acc-b4e5-a508ef83467b",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
232.4375
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 5,
"execution_start": 1643388429000,
"source_hash": "9634eaf3",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"0 ['indie rock italiano', 'italian pop']\n",
"1 ['australian hip hop']\n",
"2 ['pop']\n",
"3 ['pop', 'uk pop']\n",
"4 ['lgbtq+ hip hop', 'pop rap']\n",
" ... \n",
"1551 ['dance pop', 'pop', 'uk pop']\n",
"1552 ['sertanejo', 'sertanejo universitario']\n",
"1553 ['dance pop', 'electropop', 'pop', 'post-teen ...\n",
"1554 ['brega funk', 'funk carioca']\n",
"1555 ['pop', 'post-teen pop']\n",
"Name: Genre, Length: 1556, dtype: object"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.Genre"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"It doesn't always work. For example, in the following, we can't use attribute access because *Release Date* has a space in it."
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"cell_id": "12da1ee5-195e-42ad-9bbb-3aecc608e1b5",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 5,
"execution_start": 1643388445735,
"source_hash": "c2d0c103",
"tags": []
},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax (3791632194.py, line 1)",
"output_type": "error",
"traceback": [
"\u001b[0;36m File \u001b[0;32m\"/var/folders/8j/gshrlmtn7dg4qtztj4d4t_w40000gn/T/ipykernel_37915/3791632194.py\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m df.Release Date\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
]
}
],
"source": [
"df.Release Date"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"cell_id": "ed0f2a84-b0c9-47db-bf5d-0a9e9c5c09ca",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
232.4375
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 5,
"execution_start": 1643388460752,
"source_hash": "9e0b7036",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"0 2017-12-08\n",
"1 2021-07-09\n",
"2 2021-05-21\n",
"3 2021-06-25\n",
"4 2021-07-23\n",
" ... \n",
"1551 2017-06-02\n",
"1552 2019-10-11\n",
"1553 2018-01-12\n",
"1554 2019-09-25\n",
"1555 2019-11-13\n",
"Name: Release Date, Length: 1556, dtype: object"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"Release Date\"]"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"cell_id": "aa45def3-1eec-4144-8913-ee496e4e4bdb",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
232.4375
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 9,
"execution_start": 1643388521349,
"source_hash": "a05f272f",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"0 8.0\n",
"1 9.0\n",
"2 21.0\n",
"3 25.0\n",
"4 23.0\n",
" ... \n",
"1551 2.0\n",
"1552 11.0\n",
"1553 12.0\n",
"1554 25.0\n",
"1555 13.0\n",
"Name: Release Date, Length: 1556, dtype: float64"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.to_datetime(df[\"Release Date\"]).dt.day"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"cell_id": "8dda9b06-7f4d-444d-a9a9-9261982253ba",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
232.4375
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 0,
"execution_start": 1643388602047,
"source_hash": "993d219d",
"tags": []
},
"outputs": [],
"source": [
"year_series = pd.to_datetime(df[\"Release Date\"]).dt.year"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Sample exercise: How many `2019`s are there in `year_series`?"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"cell_id": "35d0392f-2ff8-4fc7-9a39-f808d5927712",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
21.1875
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 3,
"execution_start": 1643388715520,
"source_hash": "6ae1b884",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"181"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(year_series == 2019).sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here we're trying a different method, but it doesn't work at first because of null values. I realized after class that we could have used a keyword argument to `map` called `na_action`, but during class we removed the null values by hand."
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"cell_id": "333509a2-a623-4627-a978-f3dced88082d",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
233
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 30,
"execution_start": 1643388788276,
"source_hash": "ddf125b8",
"tags": []
},
"outputs": [
{
"ename": "TypeError",
"evalue": "'float' object is not subscriptable",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/var/folders/8j/gshrlmtn7dg4qtztj4d4t_w40000gn/T/ipykernel_37915/337003375.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Release Date\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m2019\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/miniconda3/envs/math11/lib/python3.9/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36mmap\u001b[0;34m(self, arg, na_action)\u001b[0m\n\u001b[1;32m 4159\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4160\u001b[0m \"\"\"\n\u001b[0;32m-> 4161\u001b[0;31m \u001b[0mnew_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_map_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mna_action\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mna_action\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4162\u001b[0m return self._constructor(new_values, index=self.index).__finalize__(\n\u001b[1;32m 4163\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"map\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/envs/math11/lib/python3.9/site-packages/pandas/core/base.py\u001b[0m in \u001b[0;36m_map_values\u001b[0;34m(self, mapper, na_action)\u001b[0m\n\u001b[1;32m 868\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 869\u001b[0m \u001b[0;31m# mapper is a function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 870\u001b[0;31m \u001b[0mnew_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmap_f\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmapper\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 871\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 872\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnew_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/envs/math11/lib/python3.9/site-packages/pandas/_libs/lib.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32m/var/folders/8j/gshrlmtn7dg4qtztj4d4t_w40000gn/T/ipykernel_37915/337003375.py\u001b[0m in \u001b[0;36m\u001b[0;34m(s)\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Release Date\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m2019\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m: 'float' object is not subscriptable"
]
}
],
"source": [
"df[\"Release Date\"].map(lambda s: s[:4] == 2019)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cell_id": "752d9dc3-4693-430b-a8f1-fcc167dd82e9",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 11,
"execution_start": 1643388856109,
"source_hash": "47061172",
"tags": []
},
"outputs": [
{
"ename": "TypeError",
"evalue": "'float' object is not subscriptable",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnan\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m: 'float' object is not subscriptable"
]
}
],
"source": [
"np.nan[:4]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"During class I made another mistake before getting to the next cell, but I deleted it from this notebook because it's more confusing than helpful.\n",
"\n",
"Here is an alternate approach."
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"cell_id": "327bc4a8-199a-4fe6-a45a-5bef11ad74de",
"deepnote_cell_type": "code",
"deepnote_to_be_reexecuted": false,
"execution_millis": 2,
"execution_start": 1643389005077,
"source_hash": "d9d052d5",
"tags": []
},
"outputs": [],
"source": [
"clean = df[\"Release Date\"][~df[\"Release Date\"].isna()]"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"cell_id": "f31583c0-a0a2-4463-8cc6-c5d1dbd196f5",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
21.1875
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 5,
"execution_start": 1643389110528,
"source_hash": "9eb0b2f4",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"pandas.core.series.Series"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(clean)"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 2017-12-08\n",
"1 2021-07-09\n",
"2 2021-05-21\n",
"3 2021-06-25\n",
"4 2021-07-23\n",
" ... \n",
"1551 2017-06-02\n",
"1552 2019-10-11\n",
"1553 2018-01-12\n",
"1554 2019-09-25\n",
"1555 2019-11-13\n",
"Name: Release Date, Length: 1545, dtype: object"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"clean"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Two different ways to count `2019`s in this pandas Series of strings. Notice that they give the same answer as the `.dt.year` method from above."
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {
"cell_id": "b3f67f74-26d5-41c0-a9dc-0fc321e62429",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
21.1875
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 5,
"execution_start": 1643389020671,
"source_hash": "87192a9c",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"181"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"clean.map(lambda s: s[:4] == \"2019\").sum()"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {
"cell_id": "821ba94b-c0ac-4122-bdd3-5f1231879659",
"deepnote_cell_type": "code",
"deepnote_output_heights": [
21.1875
],
"deepnote_to_be_reexecuted": false,
"execution_millis": 10,
"execution_start": 1643389057991,
"source_hash": "107a5169",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"181"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"clean.map(lambda s: int(s[:4]) == 2019).sum()"
]
}
],
"metadata": {
"deepnote": {
"is_reactive": false
},
"deepnote_execution_queue": [],
"deepnote_notebook_id": "2cfe386f-b03b-4bf9-8aba-f1d8192309bc",
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}