{ "cells": [ { "cell_type": "markdown", "metadata": { "cell_id": "643991d678104d5e9c55ce94896c4dc5", "deepnote_cell_height": 301.46875, "deepnote_cell_type": "markdown", "tags": [] }, "source": [ "# Week 2 Tuesday Discussion\n", " __Reminders:__ \n", " - Quiz #1 during the last 20 minutes of discussion today\n", " - Homework #1 due __tonight__ by 23:59\n", "\n", " Today \n", " - Any questions about Homework #1?\n", " - Review for Quiz #1\n", "\n", " ***" ] }, { "cell_type": "markdown", "metadata": { "cell_id": "8d2655a8f7c24bf48547e82a74010ccf", "deepnote_cell_height": 174.390625, "deepnote_cell_type": "markdown", "tags": [] }, "source": [ "## Quiz #1 Review\n", "\n", "## Question 1 \n", "\n", "Suppose I have the DataFrame shown below. How could I find the sub-DataFrame where the values in the column \"Hello\" are greater than 30?" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "cell_id": "b9c482c842c0472c97fd902b42901698", "deepnote_cell_height": 636, "deepnote_cell_type": "code", "deepnote_to_be_reexecuted": false, "execution_millis": 12, "execution_start": 1649315308243, "source_hash": "b783082c", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HiHello
0844
14548
21445
31912
4409
5341
64728
7374
83937
9938
\n", "
" ], "text/plain": [ " Hi Hello\n", "0 8 44\n", "1 45 48\n", "2 14 45\n", "3 19 12\n", "4 40 9\n", "5 3 41\n", "6 47 28\n", "7 37 4\n", "8 39 37\n", "9 9 38" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "df = pd.DataFrame({\"Hi\": [8,45,14,19,40,3,47,37,39,9],\"Hello\": [44,48,45,12,9,41,28,4,37,38]})\n", "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cell_id": "7fce8c6cc3a34dd1ab292b35eab6f151", "deepnote_cell_height": 118.1875, "deepnote_cell_type": "code", "deepnote_output_heights": [ 21.1875 ], "deepnote_to_be_reexecuted": false, "execution_millis": 4, "execution_start": 1649315308259, "source_hash": "6a4fefe2", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "['Hi', 'Hello']" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[c for c in df.columns if df.loc[1,c] > 10]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cell_id": "b07a9e9db31d4d6693e7122f9012b2e5", "deepnote_cell_height": 310.140625, "deepnote_cell_type": "code", "deepnote_output_heights": [ 213.140625 ], "deepnote_to_be_reexecuted": false, "execution_millis": 12, "execution_start": 1649315308264, "source_hash": "d28166ce", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 True\n", "1 True\n", "2 True\n", "3 False\n", "4 False\n", "5 True\n", "6 False\n", "7 False\n", "8 True\n", "9 True\n", "Name: Hello, dtype: bool" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[\"Hello\"] > 30" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cell_id": "0a61c35651ab41639c7e7cdb50390092", "deepnote_cell_height": 454, "deepnote_cell_type": "code", "deepnote_to_be_reexecuted": false, "execution_millis": 5, "execution_start": 1649315308288, "source_hash": "13e98621", "tags": [] }, "outputs": [ { "data": { "application/vnd.deepnote.dataframe.v3+json": { "column_count": 2, "columns": [ { "dtype": "int64", "name": "Hi", "stats": { "histogram": [ { "bin_end": 7.2, "bin_start": 3, "count": 1 }, { "bin_end": 11.4, "bin_start": 7.2, "count": 2 }, { "bin_end": 15.600000000000001, "bin_start": 11.4, "count": 1 }, { "bin_end": 19.8, "bin_start": 15.600000000000001, "count": 0 }, { "bin_end": 24, "bin_start": 19.8, "count": 0 }, { "bin_end": 28.200000000000003, "bin_start": 24, "count": 0 }, { "bin_end": 32.400000000000006, "bin_start": 28.200000000000003, "count": 0 }, { "bin_end": 36.6, "bin_start": 32.400000000000006, "count": 0 }, { "bin_end": 40.800000000000004, "bin_start": 36.6, "count": 1 }, { "bin_end": 45, "bin_start": 40.800000000000004, "count": 1 } ], "max": "45", "min": "3", "nan_count": 0, "unique_count": 6 } }, { "dtype": "int64", "name": "Hello", "stats": { "histogram": [ { "bin_end": 38.1, "bin_start": 37, "count": 2 }, { "bin_end": 39.2, "bin_start": 38.1, "count": 0 }, { "bin_end": 40.3, "bin_start": 39.2, "count": 0 }, { "bin_end": 41.4, "bin_start": 40.3, "count": 1 }, { "bin_end": 42.5, "bin_start": 41.4, "count": 0 }, { "bin_end": 43.6, "bin_start": 42.5, "count": 0 }, { "bin_end": 44.7, "bin_start": 43.6, "count": 1 }, { "bin_end": 45.8, "bin_start": 44.7, "count": 1 }, { "bin_end": 46.9, "bin_start": 45.8, "count": 0 }, { "bin_end": 48, "bin_start": 46.9, "count": 1 } ], "max": "48", "min": "37", "nan_count": 0, "unique_count": 6 } }, { "dtype": "int64", "name": "_deepnote_index_column" } ], "row_count": 6, "rows": [ { "Hello": 44, "Hi": 8, "_deepnote_index_column": 0 }, { "Hello": 48, "Hi": 45, "_deepnote_index_column": 1 }, { "Hello": 45, "Hi": 14, "_deepnote_index_column": 2 }, { "Hello": 41, "Hi": 3, "_deepnote_index_column": 5 }, { "Hello": 37, "Hi": 39, "_deepnote_index_column": 8 }, { "Hello": 38, "Hi": 9, "_deepnote_index_column": 9 } ] }, "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HiHello
0844
14548
21445
5341
83937
9938
\n", "
" ], "text/plain": [ " Hi Hello\n", "0 8 44\n", "1 45 48\n", "2 14 45\n", "5 3 41\n", "8 39 37\n", "9 9 38" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = df[df[\"Hello\"] > 30]\n", "df2" ] }, { "cell_type": "markdown", "metadata": { "cell_id": "b89502da85374b63bba748d4a1420786", "deepnote_cell_height": 130.796875, "deepnote_cell_type": "markdown", "tags": [] }, "source": [ "## Question 2 \n", "\n", "Briefly describe the difference between `iloc` and `loc`. Then, using list comprehension, find all the rows in `df` where the corresponding value in the column \"Hi\" is greater than 10 using `iloc` and then again using `loc`.\n" ] }, { "cell_type": "markdown", "metadata": { "cell_id": "6688bb06a20c4c3aa8a4dad2dadb3b7e", "deepnote_cell_height": 74.796875, "deepnote_cell_type": "markdown", "tags": [] }, "source": [ "`iloc` is used for integer-based indexing (starts at 0); you can remember this by thinking that \"i\" stands for \"integer\".\n", "`loc` is used for label-based indexing." ] }, { "cell_type": "markdown", "metadata": { "cell_id": "a5ececac1bb74324873c5b0427f820e0", "deepnote_cell_height": 74.796875, "deepnote_cell_type": "markdown", "tags": [] }, "source": [ "`iloc` is used for integer-based indexing (starting at 0), while `loc` is label-based indexing. The solution to the second half of this question gives an example of how both can work for the same question." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "cell_id": "07fdf0b097df4c20a34ccde317d866a2", "deepnote_cell_height": 118.1875, "deepnote_cell_type": "code", "deepnote_output_heights": [ 21.1875 ], "deepnote_to_be_reexecuted": false, "execution_millis": 4, "execution_start": 1649315308295, "source_hash": "80a5e66d", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "[1, 2, 3, 4, 6, 7, 8]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[r for r in df.index if df.loc[r,\"Hi\"] > 10]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "cell_id": "b636a148501f46f5a65b75619216f527", "deepnote_cell_height": 118.1875, "deepnote_cell_type": "code", "deepnote_output_heights": [ 21.1875 ], "deepnote_to_be_reexecuted": false, "execution_millis": 3, "execution_start": 1649315308302, "source_hash": "98d4778b", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "[1, 2, 3, 4, 6, 7, 8]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[r for r in range(len(df)) if df.iloc[r,0] > 10]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cell_id": "b2d506d4e466439180d966fa6cf65e33", "deepnote_cell_height": 136.1875, "deepnote_cell_type": "code", "deepnote_output_heights": [ 21.1875 ], "deepnote_to_be_reexecuted": false, "execution_millis": 3, "execution_start": 1649315308325, "source_hash": "a5cef1b2", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(10, 2)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#tuple \n", "df.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cell_id": "bdd8477717894d14a42a2ffce852e316", "deepnote_cell_height": 118.1875, "deepnote_cell_type": "code", "deepnote_output_heights": [ 21.1875 ], "deepnote_to_be_reexecuted": false, "execution_millis": 2, "execution_start": 1649315308326, "source_hash": "651ad380", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "RangeIndex(start=0, stop=10, step=1)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.index" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cell_id": "ee891a40253b4703a6ad1b4df196c592", "deepnote_cell_height": 118.1875, "deepnote_cell_type": "code", "deepnote_output_heights": [ 21.1875 ], "deepnote_to_be_reexecuted": false, "execution_millis": 51, "execution_start": 1649315555740, "owner_user_id": "e590e996-b52f-4e48-8e79-35525bbb42f6", "source_hash": "e6921796", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "[1, 2, 3, 4, 6, 7, 8]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[r for r in df.index if df.loc[r,\"Hi\"] > 10]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cell_id": "6bbd3123a5304836ae39c3919d69bf53", "deepnote_cell_height": 118.1875, "deepnote_cell_type": "code", "deepnote_output_heights": [ 21.1875 ], "deepnote_to_be_reexecuted": false, "execution_millis": 7, "execution_start": 1649315308326, "source_hash": "783ae11d", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "10" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape[0]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "cell_id": "106872d61e04481f97dbdae0d34e2394", "deepnote_cell_height": 118.1875, "deepnote_cell_type": "code", "deepnote_output_heights": [ 21.1875 ], "deepnote_to_be_reexecuted": false, "execution_millis": 8, "execution_start": 1649315308329, "source_hash": "98d4778b", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "[1, 2, 3, 4, 6, 7, 8]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[r for r in range(len(df)) if df.iloc[r,0] > 10]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "cell_id": "4d866cf6a43c46538c86af6645cadeaa", "deepnote_cell_height": 118.1875, "deepnote_cell_type": "code", "deepnote_output_heights": [ 21.1875 ], "deepnote_to_be_reexecuted": false, "execution_millis": 8, "execution_start": 1649315308336, "source_hash": "bd6ec34e", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "[1, 2, 3, 4, 6, 7, 8]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[r for r in range(len(df)) if df.iloc[r,0] > 10]" ] }, { "cell_type": "markdown", "metadata": { "cell_id": "561af443f3bf4829a25dd421ca5ea5b3", "deepnote_cell_height": 108.390625, "deepnote_cell_type": "markdown", "tags": [] }, "source": [ "## Question 3 \n", "\n", "How could you make the following output using a for-loop and f-strings?" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cell_id": "6a131c90a79a41cfbb79b81555e731d1", "deepnote_cell_height": 184.265625, "deepnote_cell_type": "code", "deepnote_to_be_reexecuted": false, "execution_millis": 5, "execution_start": 1649315308374, "is_code_hidden": true, "is_output_hidden": false, "owner_user_id": "02be19f8-8497-4212-b8d0-46ca9f1d48b9", "source_hash": "f7631c21", "tags": [ "hide-input" ] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The square of 0 is 0.\n", "The square of 3 is 9.\n", "The square of 6 is 36.\n", "The square of 9 is 81.\n", "The square of 12 is 144.\n" ] } ], "source": [ "for i in range(0,13,3):\n", " print(f\"The square of {i} is {i**2}.\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "allow_embed": false, "cell_id": "bbbd2c9ffa044b8999d09f919f500705", "deepnote_cell_height": 208.46875, "deepnote_cell_type": "code", "deepnote_to_be_reexecuted": false, "execution_millis": 1, "execution_start": 1649315308397, "source_hash": "f7631c21", "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The square of 0 is 0.\n", "The square of 3 is 9.\n", "The square of 6 is 36.\n", "The square of 9 is 81.\n", "The square of 12 is 144.\n" ] } ], "source": [ "for i in range(0,13,3):\n", " print(f\"The square of {i} is {i**2}.\")" ] } ], "metadata": { "deepnote": { "is_reactive": false }, "deepnote_execution_queue": [], "deepnote_notebook_id": "cd919db0-9727-41a8-a4ed-a1cd28f6b1b7", "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.12" } }, "nbformat": 4, "nbformat_minor": 4 }