diff --git a/.DS_Store b/.DS_Store index 99ad0b6ca..6243d71c4 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/01_Getting_&_Knowing_Your_Data/.DS_Store b/01_Getting_&_Knowing_Your_Data/.DS_Store new file mode 100644 index 000000000..563e969d2 Binary files /dev/null and b/01_Getting_&_Knowing_Your_Data/.DS_Store differ diff --git a/01_Getting_&_Knowing_Your_Data/Chipotle/Exercise_with_Solutions.ipynb b/01_Getting_&_Knowing_Your_Data/Chipotle/Exercise_with_Solutions.ipynb index 32d1e7df8..95254501d 100644 --- a/01_Getting_&_Knowing_Your_Data/Chipotle/Exercise_with_Solutions.ipynb +++ b/01_Getting_&_Knowing_Your_Data/Chipotle/Exercise_with_Solutions.ipynb @@ -14,35 +14,21 @@ "metadata": {}, "source": [ "This time we are going to pull data directly from the internet.\n", - "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np" + "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called chipo." + "### Assign it to a variable called chipo." ] }, { @@ -62,7 +48,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. See the first 10 entries" + "### See the first 10 entries" ] }, { @@ -212,7 +198,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. What is the number of observations in the dataset?" + "### What is the number of observations in the dataset?" ] }, { @@ -273,7 +259,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. What is the number of columns in the dataset?" + "### What is the number of columns in the dataset?" ] }, { @@ -302,7 +288,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Print the name of all the columns." + "### Print the name of all the columns." ] }, { @@ -333,7 +319,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. How is the dataset indexed?" + "### How is the dataset indexed?" ] }, { @@ -362,7 +348,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Which was the most-ordered item? " + "### Which was the most-ordered item? " ] }, { @@ -421,7 +407,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. For the most-ordered item, how many items were ordered?" + "### For the most-ordered item, how many items were ordered?" ] }, { @@ -480,7 +466,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. What was the most ordered item in the choice_description column?" + "### What was the most ordered item in the choice_description column?" ] }, { @@ -539,7 +525,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. How many items were orderd in total?" + "### How many items were orderd in total?" ] }, { @@ -569,14 +555,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. Turn the item price into a float" + "### Turn the item price into a float" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Step 13.a. Check the item price type" + "####a. Check the item price type" ] }, { @@ -605,7 +591,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Step 13.b. Create a lambda function and change the type of item price" + "####b. Create a lambda function and change the type of item price" ] }, { @@ -624,7 +610,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Step 13.c. Check the item price type" + "####c. Check the item price type" ] }, { @@ -653,7 +639,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 14. How much was the revenue for the period in the dataset?" + "### How much was the revenue for the period in the dataset?" ] }, { @@ -681,7 +667,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 15. How many orders were made in the period?" + "### How many orders were made in the period?" ] }, { @@ -711,7 +697,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 16. What is the average revenue amount per order?" + "### What is the average revenue amount per order?" ] }, { @@ -768,7 +754,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 17. How many different items are sold?" + "### How many different items are sold?" ] }, { @@ -795,23 +781,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/01_Getting_&_Knowing_Your_Data/Chipotle/Exercises.ipynb b/01_Getting_&_Knowing_Your_Data/Chipotle/Exercises.ipynb index 03e1fc603..793d4f897 100644 --- a/01_Getting_&_Knowing_Your_Data/Chipotle/Exercises.ipynb +++ b/01_Getting_&_Knowing_Your_Data/Chipotle/Exercises.ipynb @@ -12,37 +12,25 @@ "metadata": {}, "source": [ "This time we are going to pull data directly from the internet.\n", - "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" + "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called chipo." + "### Assign it to a variable called chipo." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -53,12 +41,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. See the first 10 entries" + "### See the first 10 entries" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false, "scrolled": false @@ -70,7 +57,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. What is the number of observations in the dataset?" + "### What is the number of observations in the dataset?" ] }, { @@ -101,12 +88,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. What is the number of columns in the dataset?" + "### What is the number of columns in the dataset?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -117,12 +103,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Print the name of all the columns." + "### Print the name of all the columns." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -133,12 +118,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. How is the dataset indexed?" + "### How is the dataset indexed?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -149,12 +133,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Which was the most-ordered item? " + "### Which was the most-ordered item? " ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -165,12 +148,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. For the most-ordered item, how many items were ordered?" + "### For the most-ordered item, how many items were ordered?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -181,12 +163,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. What was the most ordered item in the choice_description column?" + "### What was the most ordered item in the choice_description column?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -197,12 +178,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. How many items were orderd in total?" + "### How many items were orderd in total?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -213,19 +193,18 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. Turn the item price into a float" + "### Turn the item price into a float" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Step 13.a. Check the item price type" + "####a. Check the item price type" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -236,12 +215,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Step 13.b. Create a lambda function and change the type of item price" + "####b. Create a lambda function and change the type of item price" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -252,12 +230,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Step 13.c. Check the item price type" + "####c. Check the item price type" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -268,12 +245,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 14. How much was the revenue for the period in the dataset?" + "### How much was the revenue for the period in the dataset?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -284,12 +260,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 15. How many orders were made in the period?" + "### How many orders were made in the period?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -300,7 +275,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 16. What is the average revenue amount per order?" + "### What is the average revenue amount per order?" ] }, { @@ -331,12 +306,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 17. How many different items are sold?" + "### How many different items are sold?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -345,23 +319,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/01_Getting_&_Knowing_Your_Data/Chipotle/Solutions.ipynb b/01_Getting_&_Knowing_Your_Data/Chipotle/Solutions.ipynb index b497173bf..74f3568e2 100644 --- a/01_Getting_&_Knowing_Your_Data/Chipotle/Solutions.ipynb +++ b/01_Getting_&_Knowing_Your_Data/Chipotle/Solutions.ipynb @@ -14,32 +14,21 @@ "metadata": {}, "source": [ "This time we are going to pull data directly from the internet.\n", - "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" + "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials." ] }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called chipo." + "### Assign it to a variable called chipo." ] }, { @@ -55,7 +44,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. See the first 10 entries" + "### See the first 10 entries" ] }, { @@ -203,7 +192,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. What is the number of observations in the dataset?" + "### What is the number of observations in the dataset?" ] }, { @@ -262,7 +251,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. What is the number of columns in the dataset?" + "### What is the number of columns in the dataset?" ] }, { @@ -289,7 +278,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Print the name of all the columns." + "### Print the name of all the columns." ] }, { @@ -318,7 +307,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. How is the dataset indexed?" + "### How is the dataset indexed?" ] }, { @@ -345,7 +334,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Which was the most-ordered item? " + "### Which was the most-ordered item? " ] }, { @@ -399,7 +388,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. For the most-ordered item, how many items were ordered?" + "### For the most-ordered item, how many items were ordered?" ] }, { @@ -453,7 +442,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. What was the most ordered item in the choice_description column?" + "### What was the most ordered item in the choice_description column?" ] }, { @@ -507,7 +496,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. How many items were orderd in total?" + "### How many items were orderd in total?" ] }, { @@ -534,14 +523,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. Turn the item price into a float" + "### Turn the item price into a float" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Step 13.a. Check the item price type" + "####a. Check the item price type" ] }, { @@ -568,7 +557,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Step 13.b. Create a lambda function and change the type of item price" + "####b. Create a lambda function and change the type of item price" ] }, { @@ -584,7 +573,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Step 13.c. Check the item price type" + "####c. Check the item price type" ] }, { @@ -611,7 +600,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 14. How much was the revenue for the period in the dataset?" + "### How much was the revenue for the period in the dataset?" ] }, { @@ -635,7 +624,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 15. How many orders were made in the period?" + "### How many orders were made in the period?" ] }, { @@ -662,7 +651,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 16. What is the average revenue amount per order?" + "### What is the average revenue amount per order?" ] }, { @@ -715,7 +704,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 17. How many different items are sold?" + "### How many different items are sold?" ] }, { @@ -740,23 +729,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/01_Getting_&_Knowing_Your_Data/Occupation/Exercise_with_Solution.ipynb b/01_Getting_&_Knowing_Your_Data/Occupation/Exercise_with_Solution.ipynb index caf683c83..2dad605f2 100644 --- a/01_Getting_&_Knowing_Your_Data/Occupation/Exercise_with_Solution.ipynb +++ b/01_Getting_&_Knowing_Your_Data/Occupation/Exercise_with_Solution.ipynb @@ -14,32 +14,21 @@ "metadata": {}, "source": [ "This time we are going to pull data directly from the internet.\n", - "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd" + "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called users and use the 'user_id' as index" + "### Assign it to a variable called users and use the 'user_id' as index" ] }, { @@ -56,7 +45,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. See the first 25 entries" + "### See the first 25 entries" ] }, { @@ -323,7 +312,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. See the last 10 entries" + "### See the last 10 entries" ] }, { @@ -470,7 +459,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. What is the number of observations in the dataset?" + "### What is the number of observations in the dataset?" ] }, { @@ -497,7 +486,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. What is the number of columns in the dataset?" + "### What is the number of columns in the dataset?" ] }, { @@ -524,7 +513,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Print the name of all the columns." + "### Print the name of all the columns." ] }, { @@ -551,7 +540,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. How is the dataset indexed?" + "### How is the dataset indexed?" ] }, { @@ -582,7 +571,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. What is the data type of each column?" + "### What is the data type of each column?" ] }, { @@ -613,7 +602,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Print only the occupation column" + "### Print only the occupation column" ] }, { @@ -706,7 +695,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. How many different occupations are in this dataset?" + "### How many different occupations are in this dataset?" ] }, { @@ -735,7 +724,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. What is the most frequent occupation?" + "### What is the most frequent occupation?" ] }, { @@ -768,7 +757,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 14. Summarize the DataFrame." + "### Summarize the DataFrame." ] }, { @@ -862,7 +851,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 15. Summarize all the columns" + "### Summarize all the columns" ] }, { @@ -1009,7 +998,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 16. Summarize only the occupation column" + "### Summarize only the occupation column" ] }, { @@ -1040,7 +1029,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 17. What is the mean age of users?" + "### What is the mean age of users?" ] }, { @@ -1067,7 +1056,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 18. What is the age with least occurrence?" + "### What is the age with least occurrence?" ] }, { @@ -1097,9 +1086,8 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1113,7 +1101,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/01_Getting_&_Knowing_Your_Data/Occupation/Exercises.ipynb b/01_Getting_&_Knowing_Your_Data/Occupation/Exercises.ipynb index f32d9ce9f..e4d67aeef 100644 --- a/01_Getting_&_Knowing_Your_Data/Occupation/Exercises.ipynb +++ b/01_Getting_&_Knowing_Your_Data/Occupation/Exercises.ipynb @@ -12,37 +12,25 @@ "metadata": {}, "source": [ "This time we are going to pull data directly from the internet.\n", - "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" + "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called users and use the 'user_id' as index" + "### Assign it to a variable called users and use the 'user_id' as index" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -53,12 +41,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. See the first 25 entries" + "### See the first 25 entries" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false, "scrolled": true @@ -70,12 +57,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. See the last 10 entries" + "### See the last 10 entries" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false, "scrolled": true @@ -87,12 +73,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. What is the number of observations in the dataset?" + "### What is the number of observations in the dataset?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -103,12 +88,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. What is the number of columns in the dataset?" + "### What is the number of columns in the dataset?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -119,12 +103,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Print the name of all the columns." + "### Print the name of all the columns." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -135,12 +118,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. How is the dataset indexed?" + "### How is the dataset indexed?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -151,12 +133,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. What is the data type of each column?" + "### What is the data type of each column?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -167,12 +148,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Print only the occupation column" + "### Print only the occupation column" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -183,12 +163,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. How many different occupations are in this dataset?" + "### How many different occupations are in this dataset?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -199,12 +178,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. What is the most frequent occupation?" + "### What is the most frequent occupation?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -215,12 +193,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 14. Summarize the DataFrame." + "### Summarize the DataFrame." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -231,12 +208,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 15. Summarize all the columns" + "### Summarize all the columns" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -247,12 +223,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 16. Summarize only the occupation column" + "### Summarize only the occupation column" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -263,12 +238,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 17. What is the mean age of users?" + "### What is the mean age of users?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -279,12 +253,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 18. What is the age with least occurrence?" + "### What is the age with least occurrence?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -293,23 +266,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/01_Getting_&_Knowing_Your_Data/Occupation/Solutions.ipynb b/01_Getting_&_Knowing_Your_Data/Occupation/Solutions.ipynb index 345cb8f83..9cae542f8 100644 --- a/01_Getting_&_Knowing_Your_Data/Occupation/Solutions.ipynb +++ b/01_Getting_&_Knowing_Your_Data/Occupation/Solutions.ipynb @@ -14,30 +14,21 @@ "metadata": {}, "source": [ "This time we are going to pull data directly from the internet.\n", - "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" + "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials." ] }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called users and use the 'user_id' as index" + "### Assign it to a variable called users and use the 'user_id' as index" ] }, { @@ -51,7 +42,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. See the first 25 entries" + "### See the first 25 entries" ] }, { @@ -316,7 +307,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. See the last 10 entries" + "### See the last 10 entries" ] }, { @@ -461,7 +452,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. What is the number of observations in the dataset?" + "### What is the number of observations in the dataset?" ] }, { @@ -486,7 +477,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. What is the number of columns in the dataset?" + "### What is the number of columns in the dataset?" ] }, { @@ -511,7 +502,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Print the name of all the columns." + "### Print the name of all the columns." ] }, { @@ -536,7 +527,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. How is the dataset indexed?" + "### How is the dataset indexed?" ] }, { @@ -566,7 +557,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. What is the data type of each column?" + "### What is the data type of each column?" ] }, { @@ -595,7 +586,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Print only the occupation column" + "### Print only the occupation column" ] }, { @@ -682,7 +673,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. How many different occupations are in this dataset?" + "### How many different occupations are in this dataset?" ] }, { @@ -707,7 +698,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. What is the most frequent occupation?" + "### What is the most frequent occupation?" ] }, { @@ -732,7 +723,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 14. Summarize the DataFrame." + "### Summarize the DataFrame." ] }, { @@ -824,7 +815,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 15. Summarize all the columns" + "### Summarize all the columns" ] }, { @@ -969,7 +960,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 16. Summarize only the occupation column" + "### Summarize only the occupation column" ] }, { @@ -998,7 +989,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 17. What is the mean age of users?" + "### What is the mean age of users?" ] }, { @@ -1023,7 +1014,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 18. What is the age with least occurrence?" + "### What is the age with least occurrence?" ] }, { @@ -1051,9 +1042,8 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1067,7 +1057,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/01_Getting_&_Knowing_Your_Data/World Food Facts/Exercises.ipynb b/01_Getting_&_Knowing_Your_Data/World Food Facts/Exercises.ipynb deleted file mode 100644 index ea507fa1d..000000000 --- a/01_Getting_&_Knowing_Your_Data/World Food Facts/Exercises.ipynb +++ /dev/null @@ -1,191 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Exercise 1" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 1. Go to https://www.kaggle.com/openfoodfacts/world-food-facts/data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 2. Download the dataset to your computer and unzip it." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 3. Use the tsv file and assign it to a dataframe called food" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 4. See the first 5 entries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 5. What is the number of observations in the dataset?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 6. What is the number of columns in the dataset?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 7. Print the name of all the columns." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 8. What is the name of 105th column?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 9. What is the type of the observations of the 105th column?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 10. How is the dataset indexed?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 11. What is the product name of the 19th observation?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "anaconda-cloud": {}, - "kernelspec": { - "display_name": "Python [default]", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/01_Getting_&_Knowing_Your_Data/World Food Facts/Exercises_with_solutions.ipynb b/01_Getting_&_Knowing_Your_Data/World Food Facts/Exercises_with_solutions.ipynb deleted file mode 100644 index 213c4e859..000000000 --- a/01_Getting_&_Knowing_Your_Data/World Food Facts/Exercises_with_solutions.ipynb +++ /dev/null @@ -1,555 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Ex1 - Getting and knowing your Data\n", - "Check out [World Food Facts Exercises Video Tutorial](https://youtu.be/_jCSK4cMcVw) to watch a data scientist go through the exercises" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 1. Go to https://www.kaggle.com/openfoodfacts/world-food-facts/data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 2. Download the dataset to your computer and unzip it." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 3. Use the tsv file and assign it to a dataframe called food" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "//anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (0,3,5,19,20,24,25,26,27,28,36,37,38,39,48) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " interactivity=interactivity, compiler=compiler, result=result)\n" - ] - } - ], - "source": [ - "food = pd.read_csv('~/Desktop/en.openfoodfacts.org.products.tsv', sep='\\t')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 4. See the first 5 entries" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
codeurlcreatorcreated_tcreated_datetimelast_modified_tlast_modified_datetimeproduct_namegeneric_namequantity...fruits-vegetables-nuts_100gfruits-vegetables-nuts-estimate_100gcollagen-meat-protein-ratio_100gcocoa_100gchlorophyl_100gcarbon-footprint_100gnutrition-score-fr_100gnutrition-score-uk_100gglycemic-index_100gwater-hardness_100g
03087http://world-en.openfoodfacts.org/product/0000...openfoodfacts-contributors14741038662016-09-17T09:17:46Z14741038932016-09-17T09:18:13ZFarine de blé noirNaN1kg...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
14530http://world-en.openfoodfacts.org/product/0000...usda-ndb-import14890699572017-03-09T14:32:37Z14890699572017-03-09T14:32:37ZBanana Chips Sweetened (Whole)NaNNaN...NaNNaNNaNNaNNaNNaN14.014.0NaNNaN
24559http://world-en.openfoodfacts.org/product/0000...usda-ndb-import14890699572017-03-09T14:32:37Z14890699572017-03-09T14:32:37ZPeanutsNaNNaN...NaNNaNNaNNaNNaNNaN0.00.0NaNNaN
316087http://world-en.openfoodfacts.org/product/0000...usda-ndb-import14890557312017-03-09T10:35:31Z14890557312017-03-09T10:35:31ZOrganic Salted Nut MixNaNNaN...NaNNaNNaNNaNNaNNaN12.012.0NaNNaN
416094http://world-en.openfoodfacts.org/product/0000...usda-ndb-import14890556532017-03-09T10:34:13Z14890556532017-03-09T10:34:13ZOrganic PolentaNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", - "

5 rows × 163 columns

\n", - "
" - ], - "text/plain": [ - " code url \\\n", - "0 3087 http://world-en.openfoodfacts.org/product/0000... \n", - "1 4530 http://world-en.openfoodfacts.org/product/0000... \n", - "2 4559 http://world-en.openfoodfacts.org/product/0000... \n", - "3 16087 http://world-en.openfoodfacts.org/product/0000... \n", - "4 16094 http://world-en.openfoodfacts.org/product/0000... \n", - "\n", - " creator created_t created_datetime \\\n", - "0 openfoodfacts-contributors 1474103866 2016-09-17T09:17:46Z \n", - "1 usda-ndb-import 1489069957 2017-03-09T14:32:37Z \n", - "2 usda-ndb-import 1489069957 2017-03-09T14:32:37Z \n", - "3 usda-ndb-import 1489055731 2017-03-09T10:35:31Z \n", - "4 usda-ndb-import 1489055653 2017-03-09T10:34:13Z \n", - "\n", - " last_modified_t last_modified_datetime product_name \\\n", - "0 1474103893 2016-09-17T09:18:13Z Farine de blé noir \n", - "1 1489069957 2017-03-09T14:32:37Z Banana Chips Sweetened (Whole) \n", - "2 1489069957 2017-03-09T14:32:37Z Peanuts \n", - "3 1489055731 2017-03-09T10:35:31Z Organic Salted Nut Mix \n", - "4 1489055653 2017-03-09T10:34:13Z Organic Polenta \n", - "\n", - " generic_name quantity ... fruits-vegetables-nuts_100g \\\n", - "0 NaN 1kg ... NaN \n", - "1 NaN NaN ... NaN \n", - "2 NaN NaN ... NaN \n", - "3 NaN NaN ... NaN \n", - "4 NaN NaN ... NaN \n", - "\n", - " fruits-vegetables-nuts-estimate_100g collagen-meat-protein-ratio_100g \\\n", - "0 NaN NaN \n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " cocoa_100g chlorophyl_100g carbon-footprint_100g nutrition-score-fr_100g \\\n", - "0 NaN NaN NaN NaN \n", - "1 NaN NaN NaN 14.0 \n", - "2 NaN NaN NaN 0.0 \n", - "3 NaN NaN NaN 12.0 \n", - "4 NaN NaN NaN NaN \n", - "\n", - " nutrition-score-uk_100g glycemic-index_100g water-hardness_100g \n", - "0 NaN NaN NaN \n", - "1 14.0 NaN NaN \n", - "2 0.0 NaN NaN \n", - "3 12.0 NaN NaN \n", - "4 NaN NaN NaN \n", - "\n", - "[5 rows x 163 columns]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "food.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 5. What is the number of observations in the dataset?" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(356027, 163)" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "food.shape #will give you both (observations/rows, columns)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "356027" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "food.shape[0] #will give you only the observations/rows number" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 6. What is the number of columns in the dataset?" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(356027, 163)\n", - "163\n", - "\n", - "RangeIndex: 356027 entries, 0 to 356026\n", - "Columns: 163 entries, code to water-hardness_100g\n", - "dtypes: float64(107), object(56)\n", - "memory usage: 442.8+ MB\n" - ] - } - ], - "source": [ - "print(food.shape) #will give you both (observations/rows, columns)\n", - "print(food.shape[1]) #will give you only the columns number\n", - "\n", - "#OR\n", - "\n", - "food.info() #Columns: 163 entries" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 7. Print the name of all the columns." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index([u'code', u'url', u'creator', u'created_t', u'created_datetime',\n", - " u'last_modified_t', u'last_modified_datetime', u'product_name',\n", - " u'generic_name', u'quantity',\n", - " ...\n", - " u'fruits-vegetables-nuts_100g', u'fruits-vegetables-nuts-estimate_100g',\n", - " u'collagen-meat-protein-ratio_100g', u'cocoa_100g', u'chlorophyl_100g',\n", - " u'carbon-footprint_100g', u'nutrition-score-fr_100g',\n", - " u'nutrition-score-uk_100g', u'glycemic-index_100g',\n", - " u'water-hardness_100g'],\n", - " dtype='object', length=163)" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "food.columns" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 8. What is the name of 105th column?" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'-glucose_100g'" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "food.columns[104]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 9. What is the type of the observations of the 105th column?" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "dtype('float64')" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "food.dtypes['-glucose_100g']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 10. How is the dataset indexed?" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "RangeIndex(start=0, stop=356027, step=1)" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "food.index" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 11. What is the product name of the 19th observation?" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Lotus Organic Brown Jasmine Rice'" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "food.values[18][7]" - ] - } - ], - "metadata": { - "anaconda-cloud": {}, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/01_Getting_&_Knowing_Your_Data/World Food Facts/Solutions.ipynb b/01_Getting_&_Knowing_Your_Data/World Food Facts/Solutions.ipynb deleted file mode 100644 index e51bf13c4..000000000 --- a/01_Getting_&_Knowing_Your_Data/World Food Facts/Solutions.ipynb +++ /dev/null @@ -1,546 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Ex1 - Getting and knowing your Data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 1. Go to https://www.kaggle.com/openfoodfacts/world-food-facts/data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 2. Download the dataset to your computer and unzip it." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 3. Use the tsv file and assign it to a dataframe called food" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "//anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2723: DtypeWarning: Columns (0,3,5,27,36) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " interactivity=interactivity, compiler=compiler, result=result)\n" - ] - } - ], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 4. See the first 5 entries" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
codeurlcreatorcreated_tcreated_datetimelast_modified_tlast_modified_datetimeproduct_namegeneric_namequantity...fruits-vegetables-nuts_100gfruits-vegetables-nuts-estimate_100gcollagen-meat-protein-ratio_100gcocoa_100gchlorophyl_100gcarbon-footprint_100gnutrition-score-fr_100gnutrition-score-uk_100gglycemic-index_100gwater-hardness_100g
03087http://world-en.openfoodfacts.org/product/0000...openfoodfacts-contributors14741038662016-09-17T09:17:46Z14741038932016-09-17T09:18:13ZFarine de blé noirNaN1kg...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
14530http://world-en.openfoodfacts.org/product/0000...usda-ndb-import14890699572017-03-09T14:32:37Z14890699572017-03-09T14:32:37ZBanana Chips Sweetened (Whole)NaNNaN...NaNNaNNaNNaNNaNNaN14.014.0NaNNaN
24559http://world-en.openfoodfacts.org/product/0000...usda-ndb-import14890699572017-03-09T14:32:37Z14890699572017-03-09T14:32:37ZPeanutsNaNNaN...NaNNaNNaNNaNNaNNaN0.00.0NaNNaN
316087http://world-en.openfoodfacts.org/product/0000...usda-ndb-import14890557312017-03-09T10:35:31Z14890557312017-03-09T10:35:31ZOrganic Salted Nut MixNaNNaN...NaNNaNNaNNaNNaNNaN12.012.0NaNNaN
416094http://world-en.openfoodfacts.org/product/0000...usda-ndb-import14890556532017-03-09T10:34:13Z14890556532017-03-09T10:34:13ZOrganic PolentaNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", - "

5 rows × 163 columns

\n", - "
" - ], - "text/plain": [ - " code url \\\n", - "0 3087 http://world-en.openfoodfacts.org/product/0000... \n", - "1 4530 http://world-en.openfoodfacts.org/product/0000... \n", - "2 4559 http://world-en.openfoodfacts.org/product/0000... \n", - "3 16087 http://world-en.openfoodfacts.org/product/0000... \n", - "4 16094 http://world-en.openfoodfacts.org/product/0000... \n", - "\n", - " creator created_t created_datetime \\\n", - "0 openfoodfacts-contributors 1474103866 2016-09-17T09:17:46Z \n", - "1 usda-ndb-import 1489069957 2017-03-09T14:32:37Z \n", - "2 usda-ndb-import 1489069957 2017-03-09T14:32:37Z \n", - "3 usda-ndb-import 1489055731 2017-03-09T10:35:31Z \n", - "4 usda-ndb-import 1489055653 2017-03-09T10:34:13Z \n", - "\n", - " last_modified_t last_modified_datetime product_name \\\n", - "0 1474103893 2016-09-17T09:18:13Z Farine de blé noir \n", - "1 1489069957 2017-03-09T14:32:37Z Banana Chips Sweetened (Whole) \n", - "2 1489069957 2017-03-09T14:32:37Z Peanuts \n", - "3 1489055731 2017-03-09T10:35:31Z Organic Salted Nut Mix \n", - "4 1489055653 2017-03-09T10:34:13Z Organic Polenta \n", - "\n", - " generic_name quantity ... fruits-vegetables-nuts_100g \\\n", - "0 NaN 1kg ... NaN \n", - "1 NaN NaN ... NaN \n", - "2 NaN NaN ... NaN \n", - "3 NaN NaN ... NaN \n", - "4 NaN NaN ... NaN \n", - "\n", - " fruits-vegetables-nuts-estimate_100g collagen-meat-protein-ratio_100g \\\n", - "0 NaN NaN \n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " cocoa_100g chlorophyl_100g carbon-footprint_100g nutrition-score-fr_100g \\\n", - "0 NaN NaN NaN NaN \n", - "1 NaN NaN NaN 14.0 \n", - "2 NaN NaN NaN 0.0 \n", - "3 NaN NaN NaN 12.0 \n", - "4 NaN NaN NaN NaN \n", - "\n", - " nutrition-score-uk_100g glycemic-index_100g water-hardness_100g \n", - "0 NaN NaN NaN \n", - "1 14.0 NaN NaN \n", - "2 0.0 NaN NaN \n", - "3 12.0 NaN NaN \n", - "4 NaN NaN NaN \n", - "\n", - "[5 rows x 163 columns]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 5. What is the number of observations in the dataset?" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(356027, 163)" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "356027" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 6. What is the number of columns in the dataset?" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(356027, 163)\n", - "163\n", - "\n", - "RangeIndex: 356027 entries, 0 to 356026\n", - "Columns: 163 entries, code to water-hardness_100g\n", - "dtypes: float64(107), object(56)\n", - "memory usage: 442.8+ MB\n" - ] - } - ], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 7. Print the name of all the columns." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['code', 'url', 'creator', 'created_t', 'created_datetime',\n", - " 'last_modified_t', 'last_modified_datetime', 'product_name',\n", - " 'generic_name', 'quantity',\n", - " ...\n", - " 'fruits-vegetables-nuts_100g', 'fruits-vegetables-nuts-estimate_100g',\n", - " 'collagen-meat-protein-ratio_100g', 'cocoa_100g', 'chlorophyl_100g',\n", - " 'carbon-footprint_100g', 'nutrition-score-fr_100g',\n", - " 'nutrition-score-uk_100g', 'glycemic-index_100g',\n", - " 'water-hardness_100g'],\n", - " dtype='object', length=163)" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 8. What is the name of 105th column?" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "'-glucose_100g'" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 9. What is the type of the observations of the 105th column?" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "dtype('float64')" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 10. How is the dataset indexed?" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "RangeIndex(start=0, stop=356027, step=1)" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 11. What is the product name of the 19th observation?" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "'Lotus Organic Brown Jasmine Rice'" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - } - ], - "metadata": { - "anaconda-cloud": {}, - "kernelspec": { - "display_name": "Python [default]", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.4" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/02_Filtering_&_Sorting/.DS_Store b/02_Filtering_&_Sorting/.DS_Store index b086e3fb3..2b5a4e685 100644 Binary files a/02_Filtering_&_Sorting/.DS_Store and b/02_Filtering_&_Sorting/.DS_Store differ diff --git a/02_Filtering_&_Sorting/Chipotle/Exercises.ipynb b/02_Filtering_&_Sorting/Chipotle/Exercises.ipynb index dfb68245d..5dbc0df4a 100644 --- a/02_Filtering_&_Sorting/Chipotle/Exercises.ipynb +++ b/02_Filtering_&_Sorting/Chipotle/Exercises.ipynb @@ -12,37 +12,25 @@ "metadata": {}, "source": [ "This time we are going to pull data directly from the internet.\n", - "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" + "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called chipo." + "### Assign it to a variable called chipo." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -53,12 +41,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. How many products cost more than $10.00?" + "### How many products cost more than $10.00?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -69,13 +56,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. What is the price of each item? \n", + "### What is the price of each item? \n", "###### print a data frame with only two columns item_name and item_price" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -86,12 +72,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Sort by the name of the item" + "### Sort by the name of the item" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -102,12 +87,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. What was the quantity of the most expensive item ordered?" + "### What was the quantity of the most expensive item ordered?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -118,12 +102,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. How many times was a Veggie Salad Bowl ordered?" + "### How many times was a Veggie Salad Bowl ordered?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -134,12 +117,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. How many times did someone order more than one Canned Soda?" + "### How many times did someone order more than one Canned Soda?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -149,21 +131,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/02_Filtering_&_Sorting/Chipotle/Exercises_with_solutions.ipynb b/02_Filtering_&_Sorting/Chipotle/Exercises_with_solutions.ipynb index 773dd80df..3d637d7b0 100644 --- a/02_Filtering_&_Sorting/Chipotle/Exercises_with_solutions.ipynb +++ b/02_Filtering_&_Sorting/Chipotle/Exercises_with_solutions.ipynb @@ -14,32 +14,21 @@ "metadata": {}, "source": [ "This time we are going to pull data directly from the internet.\n", - "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd" + "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called chipo." + "### Assign it to a variable called chipo." ] }, { @@ -57,7 +46,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. How many products cost more than $10.00?" + "### How many products cost more than $10.00?" ] }, { @@ -750,7 +739,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. What is the price of each item? \n", + "### What is the price of each item? \n", "###### print a data frame with only two columns item_name and item_price" ] }, @@ -1422,7 +1411,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Sort by the name of the item" + "### Sort by the name of the item" ] }, { @@ -2086,7 +2075,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. What was the quantity of the most expensive item ordered?" + "### What was the quantity of the most expensive item ordered?" ] }, { @@ -2143,7 +2132,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. How many times was a Veggie Salad Bowl ordered?" + "### How many times was a Veggie Salad Bowl ordered?" ] }, { @@ -2172,7 +2161,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. How many times did someone order more than one Canned Soda?" + "### How many times did someone order more than one Canned Soda?" ] }, { @@ -2198,9 +2187,8 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -2214,20 +2202,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false + "version": "3.9.7" } }, "nbformat": 4, diff --git a/02_Filtering_&_Sorting/Chipotle/Solutions.ipynb b/02_Filtering_&_Sorting/Chipotle/Solutions.ipynb index d21f74f3e..03c75cdd9 100644 --- a/02_Filtering_&_Sorting/Chipotle/Solutions.ipynb +++ b/02_Filtering_&_Sorting/Chipotle/Solutions.ipynb @@ -12,37 +12,25 @@ "metadata": {}, "source": [ "This time we are going to pull data directly from the internet.\n", - "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" + "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called chipo." + "### Assign it to a variable called chipo." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -53,7 +41,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. How many products cost more than $10.00?" + "### How many products cost more than $10.00?" ] }, { @@ -93,7 +81,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. What is the price of each item? \n", + "### What is the price of each item? \n", "###### print a data frame with only two columns item_name and item_price" ] }, @@ -267,7 +255,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Sort by the name of the item" + "### Sort by the name of the item" ] }, { @@ -932,7 +920,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. What was the quantity of the most expensive item ordered?" + "### What was the quantity of the most expensive item ordered?" ] }, { @@ -989,7 +977,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. How many times was a Veggie Salad Bowl ordered?" + "### How many times was a Veggie Salad Bowl ordered?" ] }, { @@ -1016,7 +1004,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. How many times did someone order more than one Canned Soda?" + "### How many times did someone order more than one Canned Soda?" ] }, { @@ -1041,23 +1029,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/02_Filtering_&_Sorting/Euro12/Exercises.ipynb b/02_Filtering_&_Sorting/Euro12/Exercises.ipynb index 3d95ed471..627418866 100644 --- a/02_Filtering_&_Sorting/Euro12/Exercises.ipynb +++ b/02_Filtering_&_Sorting/Euro12/Exercises.ipynb @@ -11,37 +11,25 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This time we are going to pull data directly from the internet.\n", - "\n", - "### Step 1. Import the necessary libraries" + "This time we are going to pull data directly from the internet." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/02_Filtering_%26_Sorting/Euro12/Euro_2012_stats_TEAM.csv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/02_Filtering_%26_Sorting/Euro12/Euro_2012_stats_TEAM.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called euro12." + "### Assign it to a variable called euro12." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -52,12 +40,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Select only the Goal column." + "### Select only the Goal column." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -68,12 +55,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. How many team participated in the Euro2012?" + "### How many team participated in the Euro2012?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -84,12 +70,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. What is the number of columns in the dataset?" + "### What is the number of columns in the dataset?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -100,12 +85,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. View only the columns Team, Yellow Cards and Red Cards and assign them to a dataframe called discipline" + "### View only the columns Team, Yellow Cards and Red Cards and assign them to a dataframe called discipline" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -116,12 +100,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Sort the teams by Red Cards, then to Yellow Cards" + "### Sort the teams by Red Cards, then to Yellow Cards" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false, "scrolled": true @@ -133,12 +116,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Calculate the mean Yellow Cards given per Team" + "### Calculate the mean Yellow Cards given per Team" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -149,12 +131,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Filter teams that scored more than 6 goals" + "### Filter teams that scored more than 6 goals" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -165,12 +146,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Select the teams that start with G" + "### Select the teams that start with G" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -181,12 +161,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. Select the first 7 columns" + "### Select the first 7 columns" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -197,12 +176,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. Select all columns except the last 3." + "### Select all columns except the last 3." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -213,12 +191,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 14. Present only the Shooting Accuracy from England, Italy and Russia" + "### Present only the Shooting Accuracy from England, Italy and Russia" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -227,23 +204,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/02_Filtering_&_Sorting/Euro12/Exercises_with_Solutions.ipynb b/02_Filtering_&_Sorting/Euro12/Exercises_with_Solutions.ipynb index 29a9be096..cf9c051b8 100644 --- a/02_Filtering_&_Sorting/Euro12/Exercises_with_Solutions.ipynb +++ b/02_Filtering_&_Sorting/Euro12/Exercises_with_Solutions.ipynb @@ -12,32 +12,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This time we are going to pull data directly from the internet.\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd" + "This time we are going to pull data directly from the internet." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/02_Filtering_%26_Sorting/Euro12/Euro_2012_stats_TEAM.csv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/02_Filtering_%26_Sorting/Euro12/Euro_2012_stats_TEAM.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called euro12." + "### Assign it to a variable called euro12." ] }, { @@ -574,7 +563,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Select only the Goal column." + "### Select only the Goal column." ] }, { @@ -617,7 +606,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. How many team participated in the Euro2012?" + "### How many team participated in the Euro2012?" ] }, { @@ -644,7 +633,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. What is the number of columns in the dataset?" + "### What is the number of columns in the dataset?" ] }, { @@ -707,7 +696,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. View only the columns Team, Yellow Cards and Red Cards and assign them to a dataframe called discipline" + "### View only the columns Team, Yellow Cards and Red Cards and assign them to a dataframe called discipline" ] }, { @@ -865,7 +854,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Sort the teams by Red Cards, then to Yellow Cards" + "### Sort the teams by Red Cards, then to Yellow Cards" ] }, { @@ -1022,7 +1011,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Calculate the mean Yellow Cards given per Team" + "### Calculate the mean Yellow Cards given per Team" ] }, { @@ -1049,7 +1038,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Filter teams that scored more than 6 goals" + "### Filter teams that scored more than 6 goals" ] }, { @@ -1179,7 +1168,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Select the teams that start with G" + "### Select the teams that start with G" ] }, { @@ -1309,7 +1298,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. Select the first 7 columns" + "### Select the first 7 columns" ] }, { @@ -1553,7 +1542,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. Select all columns except the last 3." + "### Select all columns except the last 3." ] }, { @@ -2091,7 +2080,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 14. Present only the Shooting Accuracy from England, Italy and Russia" + "### Present only the Shooting Accuracy from England, Italy and Russia" ] }, { @@ -2151,9 +2140,8 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -2167,20 +2155,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false + "version": "3.9.7" } }, "nbformat": 4, diff --git a/02_Filtering_&_Sorting/Euro12/Solutions.ipynb b/02_Filtering_&_Sorting/Euro12/Solutions.ipynb index ae528ea09..4150614af 100644 --- a/02_Filtering_&_Sorting/Euro12/Solutions.ipynb +++ b/02_Filtering_&_Sorting/Euro12/Solutions.ipynb @@ -11,32 +11,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This time we are going to pull data directly from the internet.\n", - "\n", - "### Step 1. Import the necessary libraries" + "This time we are going to pull data directly from the internet." ] }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/02_Filtering_%26_Sorting/Euro12/Euro_2012_stats_TEAM.csv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/02_Filtering_%26_Sorting/Euro12/Euro_2012_stats_TEAM.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called euro12." + "### Assign it to a variable called euro12." ] }, { @@ -572,7 +561,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Select only the Goal column." + "### Select only the Goal column." ] }, { @@ -615,7 +604,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. How many team participated in the Euro2012?" + "### How many team participated in the Euro2012?" ] }, { @@ -642,7 +631,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. What is the number of columns in the dataset?" + "### What is the number of columns in the dataset?" ] }, { @@ -705,7 +694,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. View only the columns Team, Yellow Cards and Red Cards and assign them to a dataframe called discipline" + "### View only the columns Team, Yellow Cards and Red Cards and assign them to a dataframe called discipline" ] }, { @@ -860,7 +849,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Sort the teams by Red Cards, then to Yellow Cards" + "### Sort the teams by Red Cards, then to Yellow Cards" ] }, { @@ -1016,7 +1005,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Calculate the mean Yellow Cards given per Team" + "### Calculate the mean Yellow Cards given per Team" ] }, { @@ -1043,7 +1032,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Filter teams that scored more than 6 goals" + "### Filter teams that scored more than 6 goals" ] }, { @@ -1173,7 +1162,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Select the teams that start with G" + "### Select the teams that start with G" ] }, { @@ -1303,7 +1292,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. Select the first 7 columns" + "### Select the first 7 columns" ] }, { @@ -1544,7 +1533,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. Select all columns except the last 3." + "### Select all columns except the last 3." ] }, { @@ -2080,7 +2069,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 14. Present only the Shooting Accuracy from England, Italy and Russia" + "### Present only the Shooting Accuracy from England, Italy and Russia" ] }, { @@ -2138,23 +2127,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/02_Filtering_&_Sorting/Fictional Army/Exercise.ipynb b/02_Filtering_&_Sorting/Fictional Army/Exercise.ipynb index f2bd279e3..a015833ec 100644 --- a/02_Filtering_&_Sorting/Fictional Army/Exercise.ipynb +++ b/02_Filtering_&_Sorting/Fictional Army/Exercise.ipynb @@ -15,25 +15,14 @@ "\n", "This exercise was inspired by this [page](http://chrisalbon.com/python/)\n", "\n", - "Special thanks to: https://github.com/chrisalbon for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd" + "Special thanks to: https://github.com/chrisalbon for sharing the dataset and materials." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. This is the data given as a dictionary" + "### This is the data given as a dictionary" ] }, { @@ -61,14 +50,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Create a dataframe and assign it to a variable called army. \n", + "### Create a dataframe and assign it to a variable called army. \n", "\n", "#### Don't forget to include the columns names in the order presented in the dictionary ('regiment', 'company', 'deaths'...) so that the column index order is consistent with the solutions. If omitted, pandas will order the columns alphabetically." ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -77,12 +65,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Set the 'origin' colum as the index of the dataframe" + "### Set the 'origin' colum as the index of the dataframe" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -91,12 +78,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Print only the column veterans" + "### Print only the column veterans" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -105,12 +91,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Print the columns 'veterans' and 'deaths'" + "### Print the columns 'veterans' and 'deaths'" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -119,12 +104,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Print the name of all the columns." + "### Print the name of all the columns." ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -133,12 +117,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Select the 'deaths', 'size' and 'deserters' columns from Maine and Alaska" + "### Select the 'deaths', 'size' and 'deserters' columns from Maine and Alaska" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -147,12 +130,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Select the rows 3 to 7 and the columns 3 to 6" + "### Select the rows 3 to 7 and the columns 3 to 6" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -161,12 +143,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Select every row after the fourth row and all columns" + "### Select every row after the fourth row and all columns" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -175,12 +156,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Select every row up to the 4th row and all columns" + "### Select every row up to the 4th row and all columns" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -189,12 +169,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. Select the 3rd column up to the 7th column" + "### Select the 3rd column up to the 7th column" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -203,12 +182,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. Select rows where df.deaths is greater than 50" + "### Select rows where df.deaths is greater than 50" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -217,12 +195,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 14. Select rows where df.deaths is greater than 500 or less than 50" + "### Select rows where df.deaths is greater than 500 or less than 50" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -231,12 +208,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 15. Select all the regiments not named \"Dragoons\"" + "### Select all the regiments not named \"Dragoons\"" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -245,12 +221,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 16. Select the rows called Texas and Arizona" + "### Select the rows called Texas and Arizona" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -259,12 +234,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 17. Select the third cell in the row named Arizona" + "### Select the third cell in the row named Arizona" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -273,12 +247,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 18. Select the third cell down in the column named deaths" + "### Select the third cell down in the column named deaths" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -286,7 +259,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -300,7 +273,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/02_Filtering_&_Sorting/Fictional Army/Exercise_with_solutions.ipynb b/02_Filtering_&_Sorting/Fictional Army/Exercise_with_solutions.ipynb index a5ebfde1e..b570d8799 100644 --- a/02_Filtering_&_Sorting/Fictional Army/Exercise_with_solutions.ipynb +++ b/02_Filtering_&_Sorting/Fictional Army/Exercise_with_solutions.ipynb @@ -16,25 +16,14 @@ "\n", "This exercise was inspired by this [page](http://chrisalbon.com/python/)\n", "\n", - "Special thanks to: https://github.com/chrisalbon for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd" + "Special thanks to: https://github.com/chrisalbon for sharing the dataset and materials." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. This is the data given as a dictionary" + "### This is the data given as a dictionary" ] }, { @@ -60,7 +49,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Create a dataframe and assign it to a variable called army. \n", + "### Create a dataframe and assign it to a variable called army. \n", "\n", "#### Don't forget to include the columns names in the order presented in the dictionary ('regiment', 'company', 'deaths'...) so that the column index order is consistent with the solutions. If omitted, pandas will order the columns alphabetically." ] @@ -308,7 +297,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Set the 'origin' colum as the index of the dataframe" + "### Set the 'origin' colum as the index of the dataframe" ] }, { @@ -324,7 +313,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Print only the column veterans" + "### Print only the column veterans" ] }, { @@ -364,7 +353,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Print the columns 'veterans' and 'deaths'" + "### Print the columns 'veterans' and 'deaths'" ] }, { @@ -497,7 +486,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Print the name of all the columns." + "### Print the name of all the columns." ] }, { @@ -526,7 +515,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Select the 'deaths', 'size' and 'deserters' columns from Maine and Alaska" + "### Select the 'deaths', 'size' and 'deserters' columns from Maine and Alaska" ] }, { @@ -603,7 +592,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Select the rows 3 to 7 and the columns 3 to 6" + "### Select the rows 3 to 7 and the columns 3 to 6" ] }, { @@ -708,7 +697,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Select every row after the fourth row and all columns" + "### Select every row after the fourth row and all columns" ] }, { @@ -898,7 +887,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Select every row up to the 4th row and all columns" + "### Select every row up to the 4th row and all columns" ] }, { @@ -1032,7 +1021,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. Select the 3rd column up to the 7th column" + "### Select the 3rd column up to the 7th column" ] }, { @@ -1207,7 +1196,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. Select rows where df.deaths is greater than 50" + "### Select rows where df.deaths is greater than 50" ] }, { @@ -1397,7 +1386,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 14. Select rows where df.deaths is greater than 500 or less than 50" + "### Select rows where df.deaths is greater than 500 or less than 50" ] }, { @@ -1573,7 +1562,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 15. Select all the regiments not named \"Dragoons\"" + "### Select all the regiments not named \"Dragoons\"" ] }, { @@ -1763,7 +1752,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 16. Select the rows called Texas and Arizona" + "### Select the rows called Texas and Arizona" ] }, { @@ -1869,7 +1858,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 17. Select the third cell in the row named Arizona" + "### Select the third cell in the row named Arizona" ] }, { @@ -1898,7 +1887,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 18. Select the third cell down in the column named deaths" + "### Select the third cell down in the column named deaths" ] }, { @@ -1925,7 +1914,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1939,20 +1928,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false + "version": "3.9.7" } }, "nbformat": 4, diff --git a/02_Filtering_&_Sorting/Fictional Army/Solutions.ipynb b/02_Filtering_&_Sorting/Fictional Army/Solutions.ipynb index 9d98ce4c0..535c9b0d0 100644 --- a/02_Filtering_&_Sorting/Fictional Army/Solutions.ipynb +++ b/02_Filtering_&_Sorting/Fictional Army/Solutions.ipynb @@ -15,25 +15,14 @@ "\n", "This exercise was inspired by this [page](http://chrisalbon.com/python/)\n", "\n", - "Special thanks to: https://github.com/chrisalbon for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd" + "Special thanks to: https://github.com/chrisalbon for sharing the dataset and materials." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. This is the data given as a dictionary" + "### This is the data given as a dictionary" ] }, { @@ -59,1872 +48,216 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Create a dataframe and assign it to a variable called army. \n", + "### Create a dataframe and assign it to a variable called army. \n", "\n", "#### Don't forget to include the columns names in the order presented in the dictionary ('regiment', 'company', 'deaths'...) so that the column index order is consistent with the solutions. If omitted, pandas will order the columns alphabetically." ] }, { "cell_type": "code", - "execution_count": 3, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
regimentcompanydeathsbattlessizeveteransreadinessarmoreddesertersorigin
0Nighthawks1st523510451114Arizona
1Nighthawks1st524295752024California
2Nighthawks2nd2521099623131Texas
3Nighthawks2nd6162140026312Florida
4Dragoons1st434159273203Maine
5Dragoons1st2347100637114Iowa
6Dragoons2nd52389879492024Alaska
7Dragoons2nd623849483131Washington
8Scouts1st62497348202Oregon
9Scouts1st7371005435103Wyoming
10Scouts2nd378109963212Louisana
11Scouts2nd3591523345313Georgia
\n", - "
" - ], - "text/plain": [ - " regiment company deaths battles size veterans readiness armored \\\n", - "0 Nighthawks 1st 523 5 1045 1 1 1 \n", - "1 Nighthawks 1st 52 42 957 5 2 0 \n", - "2 Nighthawks 2nd 25 2 1099 62 3 1 \n", - "3 Nighthawks 2nd 616 2 1400 26 3 1 \n", - "4 Dragoons 1st 43 4 1592 73 2 0 \n", - "5 Dragoons 1st 234 7 1006 37 1 1 \n", - "6 Dragoons 2nd 523 8 987 949 2 0 \n", - "7 Dragoons 2nd 62 3 849 48 3 1 \n", - "8 Scouts 1st 62 4 973 48 2 0 \n", - "9 Scouts 1st 73 7 1005 435 1 0 \n", - "10 Scouts 2nd 37 8 1099 63 2 1 \n", - "11 Scouts 2nd 35 9 1523 345 3 1 \n", - "\n", - " deserters origin \n", - "0 4 Arizona \n", - "1 24 California \n", - "2 31 Texas \n", - "3 2 Florida \n", - "4 3 Maine \n", - "5 4 Iowa \n", - "6 24 Alaska \n", - "7 31 Washington \n", - "8 2 Oregon \n", - "9 3 Wyoming \n", - "10 2 Louisana \n", - "11 3 Georgia " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "army = pd.DataFrame(data=raw_data)\n", - "army" - ] + "outputs": [], + "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Set the 'origin' colum as the index of the dataframe" + "### Set the 'origin' colum as the index of the dataframe" ] }, { "cell_type": "code", - "execution_count": 4, "metadata": {}, "outputs": [], - "source": [ - "army.set_index('origin', inplace=True)" - ] + "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Print only the column veterans" + "### Print only the column veterans" ] }, { "cell_type": "code", - "execution_count": 5, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "origin\n", - "Arizona 1\n", - "California 5\n", - "Texas 62\n", - "Florida 26\n", - "Maine 73\n", - "Iowa 37\n", - "Alaska 949\n", - "Washington 48\n", - "Oregon 48\n", - "Wyoming 435\n", - "Louisana 63\n", - "Georgia 345\n", - "Name: veterans, dtype: int64" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "army.veterans" - ] + "outputs": [], + "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Print the columns 'veterans' and 'deaths'" + "### Print the columns 'veterans' and 'deaths'" ] }, { "cell_type": "code", - "execution_count": 6, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
veteransdeaths
origin
Arizona1523
California552
Texas6225
Florida26616
Maine7343
Iowa37234
Alaska949523
Washington4862
Oregon4862
Wyoming43573
Louisana6337
Georgia34535
\n", - "
" - ], - "text/plain": [ - " veterans deaths\n", - "origin \n", - "Arizona 1 523\n", - "California 5 52\n", - "Texas 62 25\n", - "Florida 26 616\n", - "Maine 73 43\n", - "Iowa 37 234\n", - "Alaska 949 523\n", - "Washington 48 62\n", - "Oregon 48 62\n", - "Wyoming 435 73\n", - "Louisana 63 37\n", - "Georgia 345 35" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "army[[\"veterans\", \"deaths\"]]" - ] + "outputs": [], + "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Print the name of all the columns." + "### Print the name of all the columns." ] }, { "cell_type": "code", - "execution_count": 7, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['regiment', 'company', 'deaths', 'battles', 'size', 'veterans',\n", - " 'readiness', 'armored', 'deserters'],\n", - " dtype='object')" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "army.columns" - ] + "outputs": [], + "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Select the 'deaths', 'size' and 'deserters' columns from Maine and Alaska" + "### Select the 'deaths', 'size' and 'deserters' columns from Maine and Alaska" ] }, { "cell_type": "code", - "execution_count": 8, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
deathssizedeserters
origin
Maine4315923
Alaska52398724
\n", - "
" - ], - "text/plain": [ - " deaths size deserters\n", - "origin \n", - "Maine 43 1592 3\n", - "Alaska 523 987 24" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "army.loc[[\"Maine\", \"Alaska\"], [\"deaths\", \"size\", \"deserters\"]]" - ] + "outputs": [], + "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Select the rows 3 to 7 and the columns 3 to 6" + "### Select the rows 3 to 7 and the columns 3 to 6" ] }, { "cell_type": "code", - "execution_count": 9, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
deathsbattlessizeveterans
origin
Texas252109962
Florida6162140026
Maine434159273
Iowa2347100637
Alaska5238987949
\n", - "
" - ], - "text/plain": [ - " deaths battles size veterans\n", - "origin \n", - "Texas 25 2 1099 62\n", - "Florida 616 2 1400 26\n", - "Maine 43 4 1592 73\n", - "Iowa 234 7 1006 37\n", - "Alaska 523 8 987 949" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "army.iloc[2:7, 2:6]" - ] + "outputs": [], + "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Select every row after the fourth row and all columns" + "### Select every row after the fourth row and all columns" ] }, { "cell_type": "code", - "execution_count": 10, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
regimentcompanydeathsbattlessizeveteransreadinessarmoreddeserters
origin
MaineDragoons1st434159273203
IowaDragoons1st2347100637114
AlaskaDragoons2nd52389879492024
WashingtonDragoons2nd623849483131
OregonScouts1st62497348202
WyomingScouts1st7371005435103
LouisanaScouts2nd378109963212
GeorgiaScouts2nd3591523345313
\n", - "
" - ], - "text/plain": [ - " regiment company deaths battles size veterans readiness \\\n", - "origin \n", - "Maine Dragoons 1st 43 4 1592 73 2 \n", - "Iowa Dragoons 1st 234 7 1006 37 1 \n", - "Alaska Dragoons 2nd 523 8 987 949 2 \n", - "Washington Dragoons 2nd 62 3 849 48 3 \n", - "Oregon Scouts 1st 62 4 973 48 2 \n", - "Wyoming Scouts 1st 73 7 1005 435 1 \n", - "Louisana Scouts 2nd 37 8 1099 63 2 \n", - "Georgia Scouts 2nd 35 9 1523 345 3 \n", - "\n", - " armored deserters \n", - "origin \n", - "Maine 0 3 \n", - "Iowa 1 4 \n", - "Alaska 0 24 \n", - "Washington 1 31 \n", - "Oregon 0 2 \n", - "Wyoming 0 3 \n", - "Louisana 1 2 \n", - "Georgia 1 3 " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "army.iloc[4:, :]" - ] + "outputs": [], + "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Select every row up to the 4th row and all columns" + "### Select every row up to the 4th row and all columns" ] }, { "cell_type": "code", - "execution_count": 11, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
regimentcompanydeathsbattlessizeveteransreadinessarmoreddeserters
origin
ArizonaNighthawks1st523510451114
CaliforniaNighthawks1st524295752024
TexasNighthawks2nd2521099623131
FloridaNighthawks2nd6162140026312
\n", - "
" - ], - "text/plain": [ - " regiment company deaths battles size veterans readiness \\\n", - "origin \n", - "Arizona Nighthawks 1st 523 5 1045 1 1 \n", - "California Nighthawks 1st 52 42 957 5 2 \n", - "Texas Nighthawks 2nd 25 2 1099 62 3 \n", - "Florida Nighthawks 2nd 616 2 1400 26 3 \n", - "\n", - " armored deserters \n", - "origin \n", - "Arizona 1 4 \n", - "California 0 24 \n", - "Texas 1 31 \n", - "Florida 1 2 " - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "army.iloc[:4, :]" - ] + "outputs": [], + "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. Select the 3rd column up to the 7th column" + "### Select the 3rd column up to the 7th column" ] }, { "cell_type": "code", - "execution_count": 12, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
deathsbattlessizeveteransreadiness
origin
Arizona5235104511
California524295752
Texas2521099623
Florida61621400263
Maine4341592732
Iowa23471006371
Alaska52389879492
Washington623849483
Oregon624973482
Wyoming73710054351
Louisana3781099632
Georgia35915233453
\n", - "
" - ], - "text/plain": [ - " deaths battles size veterans readiness\n", - "origin \n", - "Arizona 523 5 1045 1 1\n", - "California 52 42 957 5 2\n", - "Texas 25 2 1099 62 3\n", - "Florida 616 2 1400 26 3\n", - "Maine 43 4 1592 73 2\n", - "Iowa 234 7 1006 37 1\n", - "Alaska 523 8 987 949 2\n", - "Washington 62 3 849 48 3\n", - "Oregon 62 4 973 48 2\n", - "Wyoming 73 7 1005 435 1\n", - "Louisana 37 8 1099 63 2\n", - "Georgia 35 9 1523 345 3" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "army.iloc[:, 2:7]" - ] + "outputs": [], + "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. Select rows where df.deaths is greater than 50" + "### Select rows where df.deaths is greater than 50" ] }, { "cell_type": "code", - "execution_count": 13, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
regimentcompanydeathsbattlessizeveteransreadinessarmoreddeserters
origin
ArizonaNighthawks1st523510451114
CaliforniaNighthawks1st524295752024
FloridaNighthawks2nd6162140026312
IowaDragoons1st2347100637114
AlaskaDragoons2nd52389879492024
WashingtonDragoons2nd623849483131
OregonScouts1st62497348202
WyomingScouts1st7371005435103
\n", - "
" - ], - "text/plain": [ - " regiment company deaths battles size veterans readiness \\\n", - "origin \n", - "Arizona Nighthawks 1st 523 5 1045 1 1 \n", - "California Nighthawks 1st 52 42 957 5 2 \n", - "Florida Nighthawks 2nd 616 2 1400 26 3 \n", - "Iowa Dragoons 1st 234 7 1006 37 1 \n", - "Alaska Dragoons 2nd 523 8 987 949 2 \n", - "Washington Dragoons 2nd 62 3 849 48 3 \n", - "Oregon Scouts 1st 62 4 973 48 2 \n", - "Wyoming Scouts 1st 73 7 1005 435 1 \n", - "\n", - " armored deserters \n", - "origin \n", - "Arizona 1 4 \n", - "California 0 24 \n", - "Florida 1 2 \n", - "Iowa 1 4 \n", - "Alaska 0 24 \n", - "Washington 1 31 \n", - "Oregon 0 2 \n", - "Wyoming 0 3 " - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "army[army[\"deaths\"] > 50]" - ] + "outputs": [], + "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 14. Select rows where df.deaths is greater than 500 or less than 50" + "### Select rows where df.deaths is greater than 500 or less than 50" ] }, { "cell_type": "code", - "execution_count": 14, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
regimentcompanydeathsbattlessizeveteransreadinessarmoreddeserters
origin
ArizonaNighthawks1st523510451114
TexasNighthawks2nd2521099623131
FloridaNighthawks2nd6162140026312
MaineDragoons1st434159273203
AlaskaDragoons2nd52389879492024
LouisanaScouts2nd378109963212
GeorgiaScouts2nd3591523345313
\n", - "
" - ], - "text/plain": [ - " regiment company deaths battles size veterans readiness \\\n", - "origin \n", - "Arizona Nighthawks 1st 523 5 1045 1 1 \n", - "Texas Nighthawks 2nd 25 2 1099 62 3 \n", - "Florida Nighthawks 2nd 616 2 1400 26 3 \n", - "Maine Dragoons 1st 43 4 1592 73 2 \n", - "Alaska Dragoons 2nd 523 8 987 949 2 \n", - "Louisana Scouts 2nd 37 8 1099 63 2 \n", - "Georgia Scouts 2nd 35 9 1523 345 3 \n", - "\n", - " armored deserters \n", - "origin \n", - "Arizona 1 4 \n", - "Texas 1 31 \n", - "Florida 1 2 \n", - "Maine 0 3 \n", - "Alaska 0 24 \n", - "Louisana 1 2 \n", - "Georgia 1 3 " - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "army[(army[\"deaths\"] > 500) | (army[\"deaths\"] < 50)]" - ] + "outputs": [], + "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 15. Select all the regiments not named \"Dragoons\"" + "### Select all the regiments not named \"Dragoons\"" ] }, { "cell_type": "code", - "execution_count": 15, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
regimentcompanydeathsbattlessizeveteransreadinessarmoreddeserters
origin
ArizonaNighthawks1st523510451114
CaliforniaNighthawks1st524295752024
TexasNighthawks2nd2521099623131
FloridaNighthawks2nd6162140026312
OregonScouts1st62497348202
WyomingScouts1st7371005435103
LouisanaScouts2nd378109963212
GeorgiaScouts2nd3591523345313
\n", - "
" - ], - "text/plain": [ - " regiment company deaths battles size veterans readiness \\\n", - "origin \n", - "Arizona Nighthawks 1st 523 5 1045 1 1 \n", - "California Nighthawks 1st 52 42 957 5 2 \n", - "Texas Nighthawks 2nd 25 2 1099 62 3 \n", - "Florida Nighthawks 2nd 616 2 1400 26 3 \n", - "Oregon Scouts 1st 62 4 973 48 2 \n", - "Wyoming Scouts 1st 73 7 1005 435 1 \n", - "Louisana Scouts 2nd 37 8 1099 63 2 \n", - "Georgia Scouts 2nd 35 9 1523 345 3 \n", - "\n", - " armored deserters \n", - "origin \n", - "Arizona 1 4 \n", - "California 0 24 \n", - "Texas 1 31 \n", - "Florida 1 2 \n", - "Oregon 0 2 \n", - "Wyoming 0 3 \n", - "Louisana 1 2 \n", - "Georgia 1 3 " - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "army[army[\"regiment\"] != \"Dragoons\"]" - ] + "outputs": [], + "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 16. Select the rows called Texas and Arizona" + "### Select the rows called Texas and Arizona" ] }, { "cell_type": "code", - "execution_count": 16, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
regimentcompanydeathsbattlessizeveteransreadinessarmoreddeserters
origin
TexasNighthawks2nd2521099623131
ArizonaNighthawks1st523510451114
\n", - "
" - ], - "text/plain": [ - " regiment company deaths battles size veterans readiness \\\n", - "origin \n", - "Texas Nighthawks 2nd 25 2 1099 62 3 \n", - "Arizona Nighthawks 1st 523 5 1045 1 1 \n", - "\n", - " armored deserters \n", - "origin \n", - "Texas 1 31 \n", - "Arizona 1 4 " - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "army.loc[[\"Texas\", \"Arizona\"], :]" - ] + "outputs": [], + "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 17. Select the third cell in the row named Arizona" + "### Select the third cell in the row named Arizona" ] }, { "cell_type": "code", - "execution_count": 17, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "origin\n", - "Arizona 523\n", - "Name: deaths, dtype: int64" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "army.loc[[\"Arizona\"]].iloc[:, 2]" - ] + "outputs": [], + "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 18. Select the third cell down in the column named deaths" + "### Select the third cell down in the column named deaths" ] }, { "cell_type": "code", - "execution_count": 18, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "deaths 25\n", - "Name: Texas, dtype: int64" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "army.loc[:, [\"deaths\"]].iloc[2]" - ] + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1938,9 +271,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.9.7" } }, "nbformat": 4, - "nbformat_minor": 1 + "nbformat_minor": 4 } diff --git a/03_Grouping/.DS_Store b/03_Grouping/.DS_Store new file mode 100644 index 000000000..9e87190b0 Binary files /dev/null and b/03_Grouping/.DS_Store differ diff --git a/03_Grouping/Alcohol_Consumption/Exercise.ipynb b/03_Grouping/Alcohol_Consumption/Exercise.ipynb index d49a518b5..f0910ba16 100644 --- a/03_Grouping/Alcohol_Consumption/Exercise.ipynb +++ b/03_Grouping/Alcohol_Consumption/Exercise.ipynb @@ -17,34 +17,25 @@ "\n", "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", "\n", - "Check out this [Diagram](http://i.imgur.com/yjNkiwL.png) \n", - "### Step 1. Import the necessary libraries" + "Check out this [Diagram](http://i.imgur.com/yjNkiwL.png)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/drinks.csv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/drinks.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called drinks." + "### Assign it to a variable called drinks." ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -53,12 +44,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Which continent drinks more beer on average?" + "### Which continent drinks more beer on average?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -67,12 +57,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. For each continent print the statistics for wine consumption." + "### For each continent print the statistics for wine consumption." ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -81,12 +70,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Print the mean alcohol consumption per continent for every column" + "### Print the mean alcohol consumption per continent for every column" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -95,12 +83,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Print the median alcohol consumption per continent for every column" + "### Print the median alcohol consumption per continent for every column" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -109,13 +96,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Print the mean, min and max values for spirit consumption.\n", + "### Print the mean, min and max values for spirit consumption.\n", "#### This time output a DataFrame" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -123,21 +109,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.16" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/03_Grouping/Alcohol_Consumption/Exercise_with_solutions.ipynb b/03_Grouping/Alcohol_Consumption/Exercise_with_solutions.ipynb index 841a5af59..9329b461d 100644 --- a/03_Grouping/Alcohol_Consumption/Exercise_with_solutions.ipynb +++ b/03_Grouping/Alcohol_Consumption/Exercise_with_solutions.ipynb @@ -19,31 +19,21 @@ "\n", "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", "\n", - "Check out this [Diagram](http://i.imgur.com/yjNkiwL.png) \n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd" + "Check out this [Diagram](http://i.imgur.com/yjNkiwL.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/drinks.csv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/drinks.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called drinks." + "### Assign it to a variable called drinks." ] }, { @@ -147,7 +137,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Which continent drinks more beer on average?" + "### Which continent drinks more beer on average?" ] }, { @@ -180,7 +170,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. For each continent print the statistics for wine consumption." + "### For each continent print the statistics for wine consumption." ] }, { @@ -248,7 +238,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Print the mean alcohol consumption per continent for every column" + "### Print the mean alcohol consumption per continent for every column" ] }, { @@ -348,7 +338,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Print the median alcohol consumption per continent for every column" + "### Print the median alcohol consumption per continent for every column" ] }, { @@ -448,7 +438,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Print the mean, min and max values for spirit consumption.\n", + "### Print the mean, min and max values for spirit consumption.\n", "#### This time output a DataFrame" ] }, @@ -533,7 +523,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -547,20 +537,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false + "version": "3.9.7" } }, "nbformat": 4, diff --git a/03_Grouping/Alcohol_Consumption/Solutions.ipynb b/03_Grouping/Alcohol_Consumption/Solutions.ipynb index 3783e0b23..d3d5b2057 100644 --- a/03_Grouping/Alcohol_Consumption/Solutions.ipynb +++ b/03_Grouping/Alcohol_Consumption/Solutions.ipynb @@ -17,29 +17,21 @@ "\n", "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", "\n", - "Check out this [Diagram](http://i.imgur.com/yjNkiwL.png) \n", - "### Step 1. Import the necessary libraries" + "Check out this [Diagram](http://i.imgur.com/yjNkiwL.png)" ] }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/drinks.csv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/drinks.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called drinks." + "### Assign it to a variable called drinks." ] }, { @@ -140,7 +132,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Which continent drinks more beer on average?" + "### Which continent drinks more beer on average?" ] }, { @@ -171,7 +163,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. For each continent print the statistics for wine consumption." + "### For each continent print the statistics for wine consumption." ] }, { @@ -237,7 +229,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Print the mean alcohol consumption per continent for every column" + "### Print the mean alcohol consumption per continent for every column" ] }, { @@ -335,7 +327,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Print the median alcohol consumption per continent for every column" + "### Print the median alcohol consumption per continent for every column" ] }, { @@ -433,7 +425,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Print the mean, min and max values for spirit consumption.\n", + "### Print the mean, min and max values for spirit consumption.\n", "#### This time output a DataFrame" ] }, @@ -516,21 +508,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.16" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/03_Grouping/Occupation/Exercise.ipynb b/03_Grouping/Occupation/Exercise.ipynb index 16fbcdd40..8863d7188 100644 --- a/03_Grouping/Occupation/Exercise.ipynb +++ b/03_Grouping/Occupation/Exercise.ipynb @@ -13,37 +13,25 @@ "source": [ "### Introduction:\n", "\n", - "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" + "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called users." + "### Assign it to a variable called users." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -54,12 +42,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Discover what is the mean age per occupation" + "### Discover what is the mean age per occupation" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -70,12 +57,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Discover the Male ratio per occupation and sort it from the most to the least" + "### Discover the Male ratio per occupation and sort it from the most to the least" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -86,12 +72,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. For each occupation, calculate the minimum and maximum ages" + "### For each occupation, calculate the minimum and maximum ages" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -102,12 +87,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. For each combination of occupation and gender, calculate the mean age" + "### For each combination of occupation and gender, calculate the mean age" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -118,12 +102,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. For each occupation present the percentage of women and men" + "### For each occupation present the percentage of women and men" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -133,21 +116,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/03_Grouping/Occupation/Exercises_with_solutions.ipynb b/03_Grouping/Occupation/Exercises_with_solutions.ipynb index 1f8e419c5..b39b004f4 100644 --- a/03_Grouping/Occupation/Exercises_with_solutions.ipynb +++ b/03_Grouping/Occupation/Exercises_with_solutions.ipynb @@ -15,32 +15,21 @@ "source": [ "### Introduction:\n", "\n", - "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd" + "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called users." + "### Assign it to a variable called users." ] }, { @@ -134,7 +123,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Discover what is the mean age per occupation" + "### Discover what is the mean age per occupation" ] }, { @@ -183,7 +172,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Discover the Male ratio per occupation and sort it from the most to the least" + "### Discover the Male ratio per occupation and sort it from the most to the least" ] }, { @@ -245,7 +234,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. For each occupation, calculate the minimum and maximum ages" + "### For each occupation, calculate the minimum and maximum ages" ] }, { @@ -419,7 +408,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. For each combination of occupation and gender, calculate the mean age" + "### For each combination of occupation and gender, calculate the mean age" ] }, { @@ -488,7 +477,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. For each occupation present the percentage of women and men" + "### For each occupation present the percentage of women and men" ] }, { @@ -566,7 +555,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -580,20 +569,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false + "version": "3.9.7" } }, "nbformat": 4, diff --git a/03_Grouping/Occupation/Solutions.ipynb b/03_Grouping/Occupation/Solutions.ipynb index f07ceda01..2f4019d9a 100644 --- a/03_Grouping/Occupation/Solutions.ipynb +++ b/03_Grouping/Occupation/Solutions.ipynb @@ -13,32 +13,21 @@ "source": [ "### Introduction:\n", "\n", - "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" + "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials." ] }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called users." + "### Assign it to a variable called users." ] }, { @@ -130,7 +119,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Discover what is the mean age per occupation" + "### Discover what is the mean age per occupation" ] }, { @@ -179,7 +168,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Discover the Male ratio per occupation and sort it from the most to the least" + "### Discover the Male ratio per occupation and sort it from the most to the least" ] }, { @@ -227,7 +216,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. For each occupation, calculate the minimum and maximum ages" + "### For each occupation, calculate the minimum and maximum ages" ] }, { @@ -401,7 +390,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. For each combination of occupation and gender, calculate the mean age" + "### For each combination of occupation and gender, calculate the mean age" ] }, { @@ -470,7 +459,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. For each occupation present the percentage of women and men" + "### For each occupation present the percentage of women and men" ] }, { @@ -538,21 +527,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/03_Grouping/Regiment/Exercises.ipynb b/03_Grouping/Regiment/Exercises.ipynb index abf1c126b..ab8b40483 100644 --- a/03_Grouping/Regiment/Exercises.ipynb +++ b/03_Grouping/Regiment/Exercises.ipynb @@ -13,25 +13,14 @@ "source": [ "### Introduction:\n", "\n", - "Special thanks to: http://chrisalbon.com/ for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" + "Special thanks to: http://chrisalbon.com/ for sharing the dataset and materials." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Create the DataFrame with the following values:" + "### Create the DataFrame with the following values:" ] }, { @@ -53,13 +42,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called regiment.\n", + "### Assign it to a variable called regiment.\n", "#### Don't forget to name each column" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -70,12 +58,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. What is the mean preTestScore from the regiment Nighthawks? " + "### What is the mean preTestScore from the regiment Nighthawks? " ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -86,12 +73,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Present general statistics by company" + "### Present general statistics by company" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -102,12 +88,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. What is the mean of each company's preTestScore?" + "### What is the mean of each company's preTestScore?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -118,12 +103,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Present the mean preTestScores grouped by regiment and company" + "### Present the mean preTestScores grouped by regiment and company" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -134,12 +118,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Present the mean preTestScores grouped by regiment and company without heirarchical indexing" + "### Present the mean preTestScores grouped by regiment and company without heirarchical indexing" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -150,12 +133,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Group the entire dataframe by regiment and company" + "### Group the entire dataframe by regiment and company" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -166,12 +148,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. What is the number of observations in each regiment and company" + "### What is the number of observations in each regiment and company" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -182,12 +163,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Iterate over a group and print the name and the whole data from the regiment" + "### Iterate over a group and print the name and the whole data from the regiment" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -197,21 +177,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/03_Grouping/Regiment/Exercises_solutions.ipynb b/03_Grouping/Regiment/Exercises_solutions.ipynb index 00e825065..83abec165 100644 --- a/03_Grouping/Regiment/Exercises_solutions.ipynb +++ b/03_Grouping/Regiment/Exercises_solutions.ipynb @@ -15,25 +15,14 @@ "source": [ "### Introduction:\n", "\n", - "Special thanks to: http://chrisalbon.com/ for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd" + "Special thanks to: http://chrisalbon.com/ for sharing the dataset and materials." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Create the DataFrame with the following values:" + "### Create the DataFrame with the following values:" ] }, { @@ -55,7 +44,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called regiment.\n", + "### Assign it to a variable called regiment.\n", "#### Don't forget to name each column" ] }, @@ -210,7 +199,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. What is the mean preTestScore from the regiment Nighthawks? " + "### What is the mean preTestScore from the regiment Nighthawks? " ] }, { @@ -276,7 +265,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Present general statistics by company" + "### Present general statistics by company" ] }, { @@ -424,7 +413,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. What is the mean of each company's preTestScore?" + "### What is the mean of each company's preTestScore?" ] }, { @@ -454,7 +443,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Present the mean preTestScores grouped by regiment and company" + "### Present the mean preTestScores grouped by regiment and company" ] }, { @@ -488,7 +477,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Present the mean preTestScores grouped by regiment and company without heirarchical indexing" + "### Present the mean preTestScores grouped by regiment and company without heirarchical indexing" ] }, { @@ -554,7 +543,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Group the entire dataframe by regiment and company" + "### Group the entire dataframe by regiment and company" ] }, { @@ -643,7 +632,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. What is the number of observations in each regiment and company" + "### What is the number of observations in each regiment and company" ] }, { @@ -677,7 +666,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Iterate over a group and print the name and the whole data from the regiment" + "### Iterate over a group and print the name and the whole data from the regiment" ] }, { @@ -722,7 +711,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -736,20 +725,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false + "version": "3.9.7" } }, "nbformat": 4, diff --git a/03_Grouping/Regiment/Solutions.ipynb b/03_Grouping/Regiment/Solutions.ipynb index 53fe58be1..af96c77f6 100644 --- a/03_Grouping/Regiment/Solutions.ipynb +++ b/03_Grouping/Regiment/Solutions.ipynb @@ -13,25 +13,14 @@ "source": [ "### Introduction:\n", "\n", - "Special thanks to: http://chrisalbon.com/ for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" + "Special thanks to: http://chrisalbon.com/ for sharing the dataset and materials." ] }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Create the DataFrame with the following values:" + "### Create the DataFrame with the following values:" ] }, { @@ -53,7 +42,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called regiment.\n", + "### Assign it to a variable called regiment.\n", "#### Don't forget to name each column" ] }, @@ -207,7 +196,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. What is the mean preTestScore from the regiment Nighthawks? " + "### What is the mean preTestScore from the regiment Nighthawks? " ] }, { @@ -273,7 +262,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Present general statistics by company" + "### Present general statistics by company" ] }, { @@ -421,7 +410,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. What is the mean of each company's preTestScore?" + "### What is the mean of each company's preTestScore?" ] }, { @@ -451,7 +440,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Present the mean preTestScores grouped by regiment and company" + "### Present the mean preTestScores grouped by regiment and company" ] }, { @@ -485,7 +474,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Present the mean preTestScores grouped by regiment and company without heirarchical indexing" + "### Present the mean preTestScores grouped by regiment and company without heirarchical indexing" ] }, { @@ -551,7 +540,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Group the entire dataframe by regiment and company" + "### Group the entire dataframe by regiment and company" ] }, { @@ -640,7 +629,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. What is the number of observations in each regiment and company" + "### What is the number of observations in each regiment and company" ] }, { @@ -674,7 +663,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Iterate over a group and print the name and the whole data from the regiment" + "### Iterate over a group and print the name and the whole data from the regiment" ] }, { @@ -714,21 +703,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/04_Apply/.DS_Store b/04_Apply/.DS_Store new file mode 100644 index 000000000..97975120e Binary files /dev/null and b/04_Apply/.DS_Store differ diff --git a/04_Apply/Students_Alcohol_Consumption/Exercises.ipynb b/04_Apply/Students_Alcohol_Consumption/Exercises.ipynb index 052017f1d..17a7f3de4 100644 --- a/04_Apply/Students_Alcohol_Consumption/Exercises.ipynb +++ b/04_Apply/Students_Alcohol_Consumption/Exercises.ipynb @@ -13,37 +13,25 @@ "source": [ "### Introduction:\n", "\n", - "This time you will download a dataset from the UCI.\n", - "\n", - "### Step 1. Import the necessary libraries" + "This time you will download a dataset from the UCI." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/Students_Alcohol_Consumption/student-mat.csv)." + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/Students_Alcohol_Consumption/student-mat.csv)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called df." + "### Assign it to a variable called df." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -54,12 +42,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. For the purpose of this exercise slice the dataframe from 'school' until the 'guardian' column" + "### For the purpose of this exercise slice the dataframe from 'school' until the 'guardian' column" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -70,12 +57,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Create a lambda function that will capitalize strings." + "### Create a lambda function that will capitalize strings." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -86,12 +72,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Capitalize both Mjob and Fjob" + "### Capitalize both Mjob and Fjob" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -102,12 +87,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Print the last elements of the data set." + "### Print the last elements of the data set." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -118,12 +102,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Did you notice the original dataframe is still lowercase? Why is that? Fix it and capitalize Mjob and Fjob." + "### Did you notice the original dataframe is still lowercase? Why is that? Fix it and capitalize Mjob and Fjob." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -134,12 +117,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Create a function called majority that returns a boolean value to a new column called legal_drinker (Consider majority as older than 17 years old)" + "### Create a function called majority that returns a boolean value to a new column called legal_drinker (Consider majority as older than 17 years old)" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -148,7 +130,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -159,13 +140,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Multiply every number of the dataset by 10. \n", + "### Multiply every number of the dataset by 10. \n", "##### I know this makes no sense, don't forget it is just an exercise" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -174,7 +154,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -183,23 +162,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/04_Apply/Students_Alcohol_Consumption/Exercises_with_solutions.ipynb b/04_Apply/Students_Alcohol_Consumption/Exercises_with_solutions.ipynb index 41645d028..2bae382ad 100644 --- a/04_Apply/Students_Alcohol_Consumption/Exercises_with_solutions.ipynb +++ b/04_Apply/Students_Alcohol_Consumption/Exercises_with_solutions.ipynb @@ -13,35 +13,21 @@ "source": [ "### Introduction:\n", "\n", - "This time you will download a dataset from the UCI.\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy" + "This time you will download a dataset from the UCI." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/Students_Alcohol_Consumption/student-mat.csv)." + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/Students_Alcohol_Consumption/student-mat.csv)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called df." + "### Assign it to a variable called df." ] }, { @@ -242,7 +228,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. For the purpose of this exercise slice the dataframe from 'school' until the 'guardian' column" + "### For the purpose of this exercise slice the dataframe from 'school' until the 'guardian' column" ] }, { @@ -384,7 +370,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Create a lambda function that will capitalize strings." + "### Create a lambda function that will capitalize strings." ] }, { @@ -402,7 +388,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Capitalize both Mjob and Fjob" + "### Capitalize both Mjob and Fjob" ] }, { @@ -493,7 +479,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Print the last elements of the data set." + "### Print the last elements of the data set." ] }, { @@ -634,7 +620,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Did you notice the original dataframe is still lowercase? Why is that? Fix it and capitalize Mjob and Fjob." + "### Did you notice the original dataframe is still lowercase? Why is that? Fix it and capitalize Mjob and Fjob." ] }, { @@ -777,7 +763,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Create a function called majority that returns a boolean value to a new column called legal_drinker (Consider majority as older than 17 years old)" + "### Create a function called majority that returns a boolean value to a new column called legal_drinker (Consider majority as older than 17 years old)" ] }, { @@ -940,7 +926,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Multiply every number of the dataset by 10. \n", + "### Multiply every number of the dataset by 10. \n", "##### I know this makes no sense, don't forget it is just an exercise" ] }, @@ -1190,23 +1176,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/04_Apply/Students_Alcohol_Consumption/Solutions.ipynb b/04_Apply/Students_Alcohol_Consumption/Solutions.ipynb index e7ec9f4a3..151246052 100644 --- a/04_Apply/Students_Alcohol_Consumption/Solutions.ipynb +++ b/04_Apply/Students_Alcohol_Consumption/Solutions.ipynb @@ -13,32 +13,21 @@ "source": [ "### Introduction:\n", "\n", - "This time you will download a dataset from the UCI.\n", - "\n", - "### Step 1. Import the necessary libraries" + "This time you will download a dataset from the UCI." ] }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/Students_Alcohol_Consumption/student-mat.csv)." + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/Students_Alcohol_Consumption/student-mat.csv)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called df." + "### Assign it to a variable called df." ] }, { @@ -235,7 +224,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. For the purpose of this exercise slice the dataframe from 'school' until the 'guardian' column" + "### For the purpose of this exercise slice the dataframe from 'school' until the 'guardian' column" ] }, { @@ -374,7 +363,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Create a lambda function that will capitalize strings." + "### Create a lambda function that will capitalize strings." ] }, { @@ -390,7 +379,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Capitalize both Mjob and Fjob" + "### Capitalize both Mjob and Fjob" ] }, { @@ -478,7 +467,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Print the last elements of the data set." + "### Print the last elements of the data set." ] }, { @@ -617,7 +606,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Did you notice the original dataframe is still lowercase? Why is that? Fix it and capitalize Mjob and Fjob." + "### Did you notice the original dataframe is still lowercase? Why is that? Fix it and capitalize Mjob and Fjob." ] }, { @@ -756,7 +745,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Create a function called majority that returns a boolean value to a new column called legal_drinker (Consider majority as older than 17 years old)" + "### Create a function called majority that returns a boolean value to a new column called legal_drinker (Consider majority as older than 17 years old)" ] }, { @@ -910,7 +899,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Multiply every number of the dataset by 10. \n", + "### Multiply every number of the dataset by 10. \n", "##### I know this makes no sense, don't forget it is just an exercise" ] }, @@ -1153,23 +1142,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/04_Apply/US_Crime_Rates/Exercises.ipynb b/04_Apply/US_Crime_Rates/Exercises.ipynb index 27aac665c..aff5505ba 100644 --- a/04_Apply/US_Crime_Rates/Exercises.ipynb +++ b/04_Apply/US_Crime_Rates/Exercises.ipynb @@ -15,37 +15,25 @@ "\n", "This time you will create a data \n", "\n", - "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" + "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/US_Crime_Rates/US_Crime_Rates_1960_2014.csv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/US_Crime_Rates/US_Crime_Rates_1960_2014.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called crime." + "### Assign it to a variable called crime." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -56,12 +44,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. What is the type of the columns?" + "### What is the type of the columns?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -74,12 +61,11 @@ "source": [ "##### Have you noticed that the type of Year is int64. But pandas has a different type to work with Time Series. Let's see it now.\n", "\n", - "### Step 5. Convert the type of the column Year to datetime64" + "### Convert the type of the column Year to datetime64" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -90,12 +76,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Set the Year column as the index of the dataframe" + "### Set the Year column as the index of the dataframe" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -106,12 +91,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Delete the Total column" + "### Delete the Total column" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -122,14 +106,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Group the year by decades and sum the values\n", + "### Group the year by decades and sum the values\n", "\n", "#### Pay attention to the Population column number, summing this column is a mistake" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false, "scrolled": true @@ -141,12 +124,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. What is the most dangerous decade to live in the US?" + "### What is the most dangerous decade to live in the US?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -155,23 +137,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/04_Apply/US_Crime_Rates/Exercises_with_solutions.ipynb b/04_Apply/US_Crime_Rates/Exercises_with_solutions.ipynb index 6687d0e8d..740776248 100644 --- a/04_Apply/US_Crime_Rates/Exercises_with_solutions.ipynb +++ b/04_Apply/US_Crime_Rates/Exercises_with_solutions.ipynb @@ -17,33 +17,21 @@ "\n", "This time you will create a data \n", "\n", - "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 95, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pandas as pd" + "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/US_Crime_Rates/US_Crime_Rates_1960_2014.csv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/US_Crime_Rates/US_Crime_Rates_1960_2014.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called crime." + "### Assign it to a variable called crime." ] }, { @@ -184,7 +172,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. What is the type of the columns?" + "### What is the type of the columns?" ] }, { @@ -226,7 +214,7 @@ "source": [ "##### Have you noticed that the type of Year is int64. But pandas has a different type to work with Time Series. Let's see it now.\n", "\n", - "### Step 5. Convert the type of the column Year to datetime64" + "### Convert the type of the column Year to datetime64" ] }, { @@ -268,7 +256,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Set the Year column as the index of the dataframe" + "### Set the Year column as the index of the dataframe" ] }, { @@ -426,7 +414,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Delete the Total column" + "### Delete the Total column" ] }, { @@ -569,7 +557,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Group the year by decades and sum the values\n", + "### Group the year by decades and sum the values\n", "\n", "#### Pay attention to the Population column number, summing this column is a mistake" ] @@ -727,7 +715,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. What is the most dangerous decade to live in the US?" + "### What is the most dangerous decade to live in the US?" ] }, { @@ -763,9 +751,8 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -779,20 +766,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false + "version": "3.9.7" } }, "nbformat": 4, diff --git a/04_Apply/US_Crime_Rates/Solutions.ipynb b/04_Apply/US_Crime_Rates/Solutions.ipynb index c76d11cdf..dd33bc0ed 100644 --- a/04_Apply/US_Crime_Rates/Solutions.ipynb +++ b/04_Apply/US_Crime_Rates/Solutions.ipynb @@ -15,32 +15,21 @@ "\n", "This time you will create a data \n", "\n", - "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" + "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials." ] }, - { - "cell_type": "code", - "execution_count": 95, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/US_Crime_Rates/US_Crime_Rates_1960_2014.csv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/US_Crime_Rates/US_Crime_Rates_1960_2014.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called crime." + "### Assign it to a variable called crime." ] }, { @@ -179,7 +168,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. What is the type of the columns?" + "### What is the type of the columns?" ] }, { @@ -221,7 +210,7 @@ "source": [ "##### Have you noticed that the type of Year is int64. But pandas has a different type to work with Time Series. Let's see it now.\n", "\n", - "### Step 5. Convert the type of the column Year to datetime64" + "### Convert the type of the column Year to datetime64" ] }, { @@ -261,7 +250,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Set the Year column as the index of the dataframe" + "### Set the Year column as the index of the dataframe" ] }, { @@ -418,7 +407,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Delete the Total column" + "### Delete the Total column" ] }, { @@ -560,7 +549,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Group the year by decades and sum the values\n", + "### Group the year by decades and sum the values\n", "\n", "#### Pay attention to the Population column number, summing this column is a mistake" ] @@ -705,7 +694,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. What is the most dangerous decade to live in the US?" + "### What is the most dangerous decade to live in the US?" ] }, { @@ -741,21 +730,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/05_Merge/.DS_Store b/05_Merge/.DS_Store new file mode 100644 index 000000000..fd84e6616 Binary files /dev/null and b/05_Merge/.DS_Store differ diff --git a/05_Merge/Auto_MPG/Exercises.ipynb b/05_Merge/Auto_MPG/Exercises.ipynb index 1f741a04f..6141f2dd6 100644 --- a/05_Merge/Auto_MPG/Exercises.ipynb +++ b/05_Merge/Auto_MPG/Exercises.ipynb @@ -13,37 +13,25 @@ "source": [ "### Introduction:\n", "\n", - "The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)\n", - "\n", - "### Step 1. Import the necessary libraries" + "The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv). " + "### Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - " ### Step 3. Assign each to a variable called cars1 and cars2" + " ### Assign each to a variable called cars1 and cars2" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -54,12 +42,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1" + "### Oops, it seems our first dataset has some unnamed blank columns, fix cars1" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -70,12 +57,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. What is the number of observations in each dataset?" + "### What is the number of observations in each dataset?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -86,12 +72,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Join cars1 and cars2 into a single DataFrame called cars" + "### Join cars1 and cars2 into a single DataFrame called cars" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -102,12 +87,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000." + "### Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -118,12 +102,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Add the column owners to cars" + "### Add the column owners to cars" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -132,23 +115,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/05_Merge/Auto_MPG/Exercises_with_solutions.ipynb b/05_Merge/Auto_MPG/Exercises_with_solutions.ipynb index f032fd34e..6067afda6 100644 --- a/05_Merge/Auto_MPG/Exercises_with_solutions.ipynb +++ b/05_Merge/Auto_MPG/Exercises_with_solutions.ipynb @@ -15,35 +15,21 @@ "source": [ "### Introduction:\n", "\n", - "The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np" + "The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv). " + "### Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - " ### Step 3. Assign each to a to a variable called cars1 and cars2" + " ### Assign each to a to a variable called cars1 and cars2" ] }, { @@ -105,7 +91,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1" + "### Oops, it seems our first dataset has some unnamed blank columns, fix cars1" ] }, { @@ -229,7 +215,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. What is the number of observations in each dataset?" + "### What is the number of observations in each dataset?" ] }, { @@ -257,7 +243,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Join cars1 and cars2 into a single DataFrame called cars" + "### Join cars1 and cars2 into a single DataFrame called cars" ] }, { @@ -1168,7 +1154,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000." + "### Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000." ] }, { @@ -1242,7 +1228,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Add the column owners to cars" + "### Add the column owners to cars" ] }, { @@ -1370,23 +1356,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/05_Merge/Auto_MPG/Solutions.ipynb b/05_Merge/Auto_MPG/Solutions.ipynb index bdf6c8b64..ef03d08c1 100644 --- a/05_Merge/Auto_MPG/Solutions.ipynb +++ b/05_Merge/Auto_MPG/Solutions.ipynb @@ -15,35 +15,21 @@ "source": [ "### Introduction:\n", "\n", - "The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np" + "The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv). " + "### Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - " ### Step 3. Assign each to a to a variable called cars1 and cars2" + " ### Assign each to a to a variable called cars1 and cars2" ] }, { @@ -99,7 +85,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1" + "### Oops, it seems our first dataset has some unnamed blank columns, fix cars1" ] }, { @@ -220,7 +206,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. What is the number of observations in each dataset?" + "### What is the number of observations in each dataset?" ] }, { @@ -245,7 +231,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Join cars1 and cars2 into a single DataFrame called cars" + "### Join cars1 and cars2 into a single DataFrame called cars" ] }, { @@ -1153,7 +1139,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000." + "### Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000." ] }, { @@ -1224,7 +1210,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Add the column owners to cars" + "### Add the column owners to cars" ] }, { @@ -1349,23 +1335,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/05_Merge/Fictitous Names/Exercises.ipynb b/05_Merge/Fictitous Names/Exercises.ipynb index 4382d9d95..a76b2e5b3 100644 --- a/05_Merge/Fictitous Names/Exercises.ipynb +++ b/05_Merge/Fictitous Names/Exercises.ipynb @@ -18,32 +18,24 @@ "Special thanks to [Chris Albon](http://chrisalbon.com/) for sharing the dataset and materials.\n", "All the credits to this exercise belongs to him. \n", "\n", - "In order to understand about it go [here](https://blog.codinghorror.com/a-visual-explanation-of-sql-joins/).\n", - "\n", - "### Step 1. Import the necessary libraries" + "In order to understand about it go [here](https://blog.codinghorror.com/a-visual-explanation-of-sql-joins/)." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Create the 3 DataFrames based on the following raw data" + "### Create the 3 DataFrames based on the following raw data" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [ @@ -66,14 +58,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign each to a variable called data1, data2, data3" + "### Assign each to a variable called data1, data2, data3" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [] @@ -82,14 +77,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Join the two dataframes along rows and assign all_data" + "### Join the two dataframes along rows and assign all_data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [] @@ -98,14 +96,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Join the two dataframes along columns and assing to all_data_col" + "### Join the two dataframes along columns and assing to all_data_col" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [] @@ -114,14 +115,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Print data3" + "### Print data3" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [] @@ -130,14 +134,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Merge all_data and data3 along the subject_id value" + "### Merge all_data and data3 along the subject_id value" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [] @@ -146,14 +153,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Merge only the data that has the same 'subject_id' on both data1 and data2" + "### Merge only the data that has the same 'subject_id' on both data1 and data2" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [] @@ -162,14 +172,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Merge all values in data1 and data2, with matching records from both sides where available." + "### Merge all values in data1 and data2, with matching records from both sides where available." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [] @@ -177,23 +190,23 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 4 } diff --git a/05_Merge/Fictitous Names/Exercises_with_solutions.ipynb b/05_Merge/Fictitous Names/Exercises_with_solutions.ipynb index a4d5a6c7c..a01e36a9c 100644 --- a/05_Merge/Fictitous Names/Exercises_with_solutions.ipynb +++ b/05_Merge/Fictitous Names/Exercises_with_solutions.ipynb @@ -20,25 +20,14 @@ "Special thanks to [Chris Albon](http://chrisalbon.com/) for sharing the dataset and materials.\n", "All the credits to this exercise belongs to him. \n", "\n", - "In order to understand about it go to [here](https://blog.codinghorror.com/a-visual-explanation-of-sql-joins/).\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd" + "In order to understand about it go to [here](https://blog.codinghorror.com/a-visual-explanation-of-sql-joins/)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Create the 3 DataFrames based on the following raw data" + "### Create the 3 DataFrames based on the following raw data" ] }, { @@ -68,7 +57,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign each to a variable called data1, data2, data3" + "### Assign each to a variable called data1, data2, data3" ] }, { @@ -174,7 +163,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Join the two dataframes along rows and assign all_data" + "### Join the two dataframes along rows and assign all_data" ] }, { @@ -288,7 +277,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Join the two dataframes along columns and assing to all_data_col" + "### Join the two dataframes along columns and assing to all_data_col" ] }, { @@ -385,7 +374,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Print data3" + "### Print data3" ] }, { @@ -487,7 +476,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Merge all_data and data3 along the subject_id value" + "### Merge all_data and data3 along the subject_id value" ] }, { @@ -603,7 +592,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Merge only the data that has the same 'subject_id' on both data1 and data2" + "### Merge only the data that has the same 'subject_id' on both data1 and data2" ] }, { @@ -666,7 +655,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Merge all values in data1 and data2, with matching records from both sides where available." + "### Merge all values in data1 and data2, with matching records from both sides where available." ] }, { @@ -782,7 +771,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -796,20 +785,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false + "version": "3.9.7" } }, "nbformat": 4, diff --git a/05_Merge/Fictitous Names/Solutions.ipynb b/05_Merge/Fictitous Names/Solutions.ipynb index c21531f1a..f81887f2f 100644 --- a/05_Merge/Fictitous Names/Solutions.ipynb +++ b/05_Merge/Fictitous Names/Solutions.ipynb @@ -18,25 +18,14 @@ "Special thanks to [Chris Albon](http://chrisalbon.com/) for sharing the dataset and materials.\n", "All the credits to this exercise belongs to him. \n", "\n", - "In order to understand about it go to [here](https://blog.codinghorror.com/a-visual-explanation-of-sql-joins/).\n", - "\n", - "### Step 1. Import the necessary libraries" + "In order to understand about it go to [here](https://blog.codinghorror.com/a-visual-explanation-of-sql-joins/)." ] }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Create the 3 DataFrames based on the following raw data" + "### Create the 3 DataFrames based on the following raw data" ] }, { @@ -66,7 +55,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign each to a variable called data1, data2, data3" + "### Assign each to a variable called data1, data2, data3" ] }, { @@ -168,7 +157,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Join the two dataframes along rows and assign all_data" + "### Join the two dataframes along rows and assign all_data" ] }, { @@ -281,7 +270,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Join the two dataframes along columns and assing to all_data_col" + "### Join the two dataframes along columns and assing to all_data_col" ] }, { @@ -377,7 +366,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Print data3" + "### Print data3" ] }, { @@ -479,7 +468,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Merge all_data and data3 along the subject_id value" + "### Merge all_data and data3 along the subject_id value" ] }, { @@ -595,7 +584,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Merge only the data that has the same 'subject_id' on both data1 and data2" + "### Merge only the data that has the same 'subject_id' on both data1 and data2" ] }, { @@ -658,7 +647,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Merge all values in data1 and data2, with matching records from both sides where available." + "### Merge all values in data1 and data2, with matching records from both sides where available." ] }, { @@ -774,21 +763,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/05_Merge/Housing Market/Exercises.ipynb b/05_Merge/Housing Market/Exercises.ipynb index 02e044d9b..810509cf2 100644 --- a/05_Merge/Housing Market/Exercises.ipynb +++ b/05_Merge/Housing Market/Exercises.ipynb @@ -13,25 +13,14 @@ "source": [ "### Introduction:\n", "\n", - "This time we will create our own dataset with fictional numbers to describe a house market. As we are going to create random data don't try to reason of the numbers.\n", - "\n", - "### Step 1. Import the necessary libraries" + "This time we will create our own dataset with fictional numbers to describe a house market. As we are going to create random data don't try to reason of the numbers." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Create 3 differents Series, each of length 100, as follows: \n", + "### Create 3 differents Series, each of length 100, as follows: \n", "1. The first a random number from 1 to 4 \n", "2. The second a random number from 1 to 3\n", "3. The third a random number from 10,000 to 30,000" @@ -39,7 +28,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -50,12 +38,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Let's create a DataFrame by joinning the Series by column" + "### Let's create a DataFrame by joinning the Series by column" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -66,12 +53,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Change the name of the columns to bedrs, bathrs, price_sqr_meter" + "### Change the name of the columns to bedrs, bathrs, price_sqr_meter" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -82,12 +68,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Create a one column DataFrame with the values of the 3 Series and assign it to 'bigcolumn'" + "### Create a one column DataFrame with the values of the 3 Series and assign it to 'bigcolumn'" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -98,12 +83,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Oops, it seems it is going only until index 99. Is it true?" + "### Oops, it seems it is going only until index 99. Is it true?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -114,12 +98,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Reindex the DataFrame so it goes from 0 to 299" + "### Reindex the DataFrame so it goes from 0 to 299" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -129,21 +112,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/05_Merge/Housing Market/Exercises_with_solutions.ipynb b/05_Merge/Housing Market/Exercises_with_solutions.ipynb index 1d33c0824..d21819df8 100644 --- a/05_Merge/Housing Market/Exercises_with_solutions.ipynb +++ b/05_Merge/Housing Market/Exercises_with_solutions.ipynb @@ -13,28 +13,14 @@ "source": [ "### Introduction:\n", "\n", - "This time we will create our own dataset with fictional numbers to describe a house market. As we are going to create random data don't try to reason of the numbers.\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np" + "This time we will create our own dataset with fictional numbers to describe a house market. As we are going to create random data don't try to reason of the numbers." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Create 3 differents Series, each of length 100, as follows: \n", + "### Create 3 differents Series, each of length 100, as follows: \n", "1. The first a random number from 1 to 4 \n", "2. The second a random number from 1 to 3\n", "3. The third a random number from 10,000 to 30,000" @@ -250,7 +236,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Let's create a DataFrame by joinning the Series by column" + "### Let's create a DataFrame by joinning the Series by column" ] }, { @@ -331,7 +317,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Change the name of the columns to bedrs, bathrs, price_sqr_meter" + "### Change the name of the columns to bedrs, bathrs, price_sqr_meter" ] }, { @@ -412,7 +398,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Create a one column DataFrame with the values of the 3 Series and assign it to 'bigcolumn'" + "### Create a one column DataFrame with the values of the 3 Series and assign it to 'bigcolumn'" ] }, { @@ -777,7 +763,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Oops, it seems it is going only until index 99. Is it true?" + "### Oops, it seems it is going only until index 99. Is it true?" ] }, { @@ -807,7 +793,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Reindex the DataFrame so it goes from 0 to 299" + "### Reindex the DataFrame so it goes from 0 to 299" ] }, { @@ -1158,21 +1144,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/05_Merge/Housing Market/Solutions.ipynb b/05_Merge/Housing Market/Solutions.ipynb index 07101612c..c2a941d43 100644 --- a/05_Merge/Housing Market/Solutions.ipynb +++ b/05_Merge/Housing Market/Solutions.ipynb @@ -13,28 +13,14 @@ "source": [ "### Introduction:\n", "\n", - "This time we will create our own dataset with fictional numbers to describe a house market. As we are going to create random data don't try to reason of the numbers.\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np" + "This time we will create our own dataset with fictional numbers to describe a house market. As we are going to create random data don't try to reason of the numbers." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Create 3 differents Series, each of length 100, as follows: \n", + "### Create 3 differents Series, each of length 100, as follows: \n", "1. The first a random number from 1 to 4 \n", "2. The second a random number from 1 to 3\n", "3. The third a random number from 10,000 to 30,000" @@ -244,7 +230,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Let's create a DataFrame by joinning the Series by column" + "### Let's create a DataFrame by joinning the Series by column" ] }, { @@ -322,7 +308,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Change the name of the columns to bedrs, bathrs, price_sqr_meter" + "### Change the name of the columns to bedrs, bathrs, price_sqr_meter" ] }, { @@ -400,7 +386,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Create a one column DataFrame with the values of the 3 Series and assign it to 'bigcolumn'" + "### Create a one column DataFrame with the values of the 3 Series and assign it to 'bigcolumn'" ] }, { @@ -756,7 +742,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Oops, it seems it is going only until index 99. Is it true?" + "### Oops, it seems it is going only until index 99. Is it true?" ] }, { @@ -785,7 +771,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Reindex the DataFrame so it goes from 0 to 299" + "### Reindex the DataFrame so it goes from 0 to 299" ] }, { @@ -1133,21 +1119,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/06_Stats/.DS_Store b/06_Stats/.DS_Store new file mode 100644 index 000000000..cb72c511e Binary files /dev/null and b/06_Stats/.DS_Store differ diff --git a/06_Stats/US_Baby_Names/Exercises.ipynb b/06_Stats/US_Baby_Names/Exercises.ipynb index f9f79e7ae..74370b5b2 100644 --- a/06_Stats/US_Baby_Names/Exercises.ipynb +++ b/06_Stats/US_Baby_Names/Exercises.ipynb @@ -14,38 +14,25 @@ "### Introduction:\n", "\n", "We are going to use a subset of [US Baby Names](https://www.kaggle.com/kaggle/us-baby-names) from Kaggle. \n", - "In the file it will be names from 2004 until 2014\n", - "\n", - "\n", - "### Step 1. Import the necessary libraries" + "In the file it will be names from 2004 until 2014" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/06_Stats/US_Baby_Names/US_Baby_Names_right.csv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/06_Stats/US_Baby_Names/US_Baby_Names_right.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called baby_names." + "### Assign it to a variable called baby_names." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -56,12 +43,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. See the first 10 entries" + "### See the first 10 entries" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -72,12 +58,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Delete the column 'Unnamed: 0' and 'Id'" + "### Delete the column 'Unnamed: 0' and 'Id'" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -88,12 +73,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Is there more male or female names in the dataset?" + "### Is there more male or female names in the dataset?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -104,12 +88,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Group the dataset by name and assign to names" + "### Group the dataset by name and assign to names" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -120,12 +103,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. How many different names exist in the dataset?" + "### How many different names exist in the dataset?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -136,12 +118,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. What is the name with most occurrences?" + "### What is the name with most occurrences?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -152,12 +133,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. How many different names have the least occurrences?" + "### How many different names have the least occurrences?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -168,12 +148,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. What is the median name occurrence?" + "### What is the median name occurrence?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -184,12 +163,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. What is the standard deviation of names?" + "### What is the standard deviation of names?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -200,12 +178,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. Get a summary with the mean, min, max, std and quartiles." + "### Get a summary with the mean, min, max, std and quartiles." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -214,23 +191,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/06_Stats/US_Baby_Names/Exercises_with_solutions.ipynb b/06_Stats/US_Baby_Names/Exercises_with_solutions.ipynb index f83fecad6..55f403378 100644 --- a/06_Stats/US_Baby_Names/Exercises_with_solutions.ipynb +++ b/06_Stats/US_Baby_Names/Exercises_with_solutions.ipynb @@ -16,33 +16,21 @@ "### Introduction:\n", "\n", "We are going to use a subset of [US Baby Names](https://www.kaggle.com/kaggle/us-baby-names) from Kaggle. \n", - "In the file it will be names from 2004 until 2014\n", - "\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd" + "In the file it will be names from 2004 until 2014" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/06_Stats/US_Baby_Names/US_Baby_Names_right.csv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/06_Stats/US_Baby_Names/US_Baby_Names_right.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called baby_names." + "### Assign it to a variable called baby_names." ] }, { @@ -78,7 +66,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. See the first 10 entries" + "### See the first 10 entries" ] }, { @@ -235,7 +223,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Delete the column 'Unnamed: 0' and 'Id'" + "### Delete the column 'Unnamed: 0' and 'Id'" ] }, { @@ -331,7 +319,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Are there more male or female names in the dataset?" + "### Are there more male or female names in the dataset?" ] }, { @@ -360,7 +348,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Group the dataset by name and assign to names" + "### Group the dataset by name and assign to names" ] }, { @@ -451,7 +439,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. How many different names exist in the dataset?" + "### How many different names exist in the dataset?" ] }, { @@ -480,7 +468,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. What is the name with most occurrences?" + "### What is the name with most occurrences?" ] }, { @@ -511,7 +499,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. How many different names have the least occurrences?" + "### How many different names have the least occurrences?" ] }, { @@ -538,7 +526,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. What is the median name occurrence?" + "### What is the median name occurrence?" ] }, { @@ -892,7 +880,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. What is the standard deviation of names?" + "### What is the standard deviation of names?" ] }, { @@ -919,7 +907,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. Get a summary with the mean, min, max, std and quartiles." + "### Get a summary with the mean, min, max, std and quartiles." ] }, { @@ -998,9 +986,8 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1014,20 +1001,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false + "version": "3.9.7" } }, "nbformat": 4, diff --git a/06_Stats/US_Baby_Names/Solutions.ipynb b/06_Stats/US_Baby_Names/Solutions.ipynb index 9acac382e..3aa5c9e83 100644 --- a/06_Stats/US_Baby_Names/Solutions.ipynb +++ b/06_Stats/US_Baby_Names/Solutions.ipynb @@ -14,33 +14,21 @@ "### Introduction:\n", "\n", "We are going to use a subset of [US Baby Names](https://www.kaggle.com/kaggle/us-baby-names) from Kaggle. \n", - "In the file it will be names from 2004 until 2014\n", - "\n", - "\n", - "### Step 1. Import the necessary libraries" + "In the file it will be names from 2004 until 2014" ] }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/06_Stats/US_Baby_Names/US_Baby_Names_right.csv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/06_Stats/US_Baby_Names/US_Baby_Names_right.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called baby_names." + "### Assign it to a variable called baby_names." ] }, { @@ -75,7 +63,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. See the first 10 entries" + "### See the first 10 entries" ] }, { @@ -232,7 +220,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Delete the column 'Unnamed: 0' and 'Id'" + "### Delete the column 'Unnamed: 0' and 'Id'" ] }, { @@ -322,7 +310,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Are there more male or female names in the dataset?" + "### Are there more male or female names in the dataset?" ] }, { @@ -351,7 +339,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Group the dataset by name and assign to names" + "### Group the dataset by name and assign to names" ] }, { @@ -429,7 +417,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. How many different names exist in the dataset?" + "### How many different names exist in the dataset?" ] }, { @@ -456,7 +444,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. What is the name with most occurrences?" + "### What is the name with most occurrences?" ] }, { @@ -483,7 +471,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. How many different names have the least occurrences?" + "### How many different names have the least occurrences?" ] }, { @@ -510,7 +498,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. What is the median name occurrence?" + "### What is the median name occurrence?" ] }, { @@ -864,7 +852,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. What is the standard deviation of names?" + "### What is the standard deviation of names?" ] }, { @@ -891,7 +879,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. Get a summary with the mean, min, max, std and quartiles." + "### Get a summary with the mean, min, max, std and quartiles." ] }, { @@ -970,23 +958,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/06_Stats/Wind_Stats/Exercises.ipynb b/06_Stats/Wind_Stats/Exercises.ipynb index d62c91be9..ff900bb17 100644 --- a/06_Stats/Wind_Stats/Exercises.ipynb +++ b/06_Stats/Wind_Stats/Exercises.ipynb @@ -67,36 +67,24 @@ "metadata": { "collapsed": false }, - "source": [ - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/06_Stats/Wind_Stats/wind.data)" + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/06_Stats/Wind_Stats/wind.data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called data and replace the first 3 columns by a proper datetime index." + "### Assign it to a variable called data and replace the first 3 columns by a proper datetime index." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -107,12 +95,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Year 2061? Do we really have data from this year? Create a function to fix it and apply it." + "### Year 2061? Do we really have data from this year? Create a function to fix it and apply it." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -123,12 +110,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Set the right dates as the index. Pay attention at the data type, it should be datetime64[ns]." + "### Set the right dates as the index. Pay attention at the data type, it should be datetime64[ns]." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -139,13 +125,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Compute how many values are missing for each location over the entire record. \n", + "### Compute how many values are missing for each location over the entire record. \n", "#### They should be ignored in all calculations below. " ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -156,12 +141,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Compute how many non-missing values there are in total." + "### Compute how many non-missing values there are in total." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false, "scrolled": true @@ -173,13 +157,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Calculate the mean windspeeds of the windspeeds over all the locations and all the times.\n", + "### Calculate the mean windspeeds of the windspeeds over all the locations and all the times.\n", "#### A single number for the entire dataset." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -190,14 +173,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Create a DataFrame called loc_stats and calculate the min, max and mean windspeeds and standard deviations of the windspeeds at each location over all the days \n", + "### Create a DataFrame called loc_stats and calculate the min, max and mean windspeeds and standard deviations of the windspeeds at each location over all the days \n", "\n", "#### A different set of numbers for each location." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -208,14 +190,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Create a DataFrame called day_stats and calculate the min, max and mean windspeed and standard deviations of the windspeeds across all the locations at each day.\n", + "### Create a DataFrame called day_stats and calculate the min, max and mean windspeed and standard deviations of the windspeeds across all the locations at each day.\n", "\n", "#### A different set of numbers for each day." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -226,13 +207,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Find the average windspeed in January for each location. \n", + "### Find the average windspeed in January for each location. \n", "#### Treat January 1961 and January 1962 both as January." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -243,12 +223,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. Downsample the record to a yearly frequency for each location." + "### Downsample the record to a yearly frequency for each location." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -259,12 +238,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. Downsample the record to a monthly frequency for each location." + "### Downsample the record to a monthly frequency for each location." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -275,12 +253,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 14. Downsample the record to a weekly frequency for each location." + "### Downsample the record to a weekly frequency for each location." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -291,12 +268,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 15. Calculate the min, max and mean windspeeds and standard deviations of the windspeeds across all locations for each week (assume that the first week starts on January 2 1961) for the first 52 weeks." + "### Calculate the min, max and mean windspeeds and standard deviations of the windspeeds across all locations for each week (assume that the first week starts on January 2 1961) for the first 52 weeks." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -305,23 +281,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/06_Stats/Wind_Stats/Exercises_with_solutions.ipynb b/06_Stats/Wind_Stats/Exercises_with_solutions.ipynb index 7fab5059f..ca1a3cef1 100644 --- a/06_Stats/Wind_Stats/Exercises_with_solutions.ipynb +++ b/06_Stats/Wind_Stats/Exercises_with_solutions.ipynb @@ -65,32 +65,20 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import datetime" - ] + "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://github.com/guipsamora/pandas_exercises/blob/master/06_Stats/Wind_Stats/wind.data)" + "### Import the dataset from this [address](https://github.com/guipsamora/pandas_exercises/blob/master/06_Stats/Wind_Stats/wind.data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called data and replace the first 3 columns by a proper datetime index." + "### Assign it to a variable called data and replace the first 3 columns by a proper datetime index." ] }, { @@ -251,7 +239,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Year 2061? Do we really have data from this year? Create a function to fix it and apply it." + "### Year 2061? Do we really have data from this year? Create a function to fix it and apply it." ] }, { @@ -420,7 +408,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Set the right dates as the index. Pay attention at the data type, it should be datetime64[ns]." + "### Set the right dates as the index. Pay attention at the data type, it should be datetime64[ns]." ] }, { @@ -596,7 +584,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Compute how many values are missing for each location over the entire record. \n", + "### Compute how many values are missing for each location over the entire record. \n", "#### They should be ignored in all calculations below. " ] }, @@ -637,7 +625,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Compute how many non-missing values there are in total." + "### Compute how many non-missing values there are in total." ] }, { @@ -683,7 +671,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Calculate the mean windspeeds of the windspeeds over all the locations and all the times.\n", + "### Calculate the mean windspeeds of the windspeeds over all the locations and all the times.\n", "#### A single number for the entire dataset." ] }, @@ -711,7 +699,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Create a DataFrame called loc_stats and calculate the min, max and mean windspeeds and standard deviations of the windspeeds at each location over all the days \n", + "### Create a DataFrame called loc_stats and calculate the min, max and mean windspeeds and standard deviations of the windspeeds at each location over all the days \n", "\n", "#### A different set of numbers for each location." ] @@ -890,7 +878,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Create a DataFrame called day_stats and calculate the min, max and mean windspeed and standard deviations of the windspeeds across all the locations at each day.\n", + "### Create a DataFrame called day_stats and calculate the min, max and mean windspeed and standard deviations of the windspeeds across all the locations at each day.\n", "\n", "#### A different set of numbers for each day." ] @@ -1006,7 +994,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Find the average windspeed in January for each location. \n", + "### Find the average windspeed in January for each location. \n", "#### Treat January 1961 and January 1962 both as January." ] }, @@ -1046,7 +1034,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. Downsample the record to a yearly frequency for each location." + "### Downsample the record to a yearly frequency for each location." ] }, { @@ -1436,7 +1424,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. Downsample the record to a monthly frequency for each location." + "### Downsample the record to a monthly frequency for each location." ] }, { @@ -2560,7 +2548,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 14. Downsample the record to a weekly frequency for each location." + "### Downsample the record to a weekly frequency for each location." ] }, { @@ -3735,7 +3723,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 15. Calculate the min, max and mean windspeeds and standard deviations of the windspeeds across all locations for each week (assume that the first week starts on January 2 1961) for the first 52 weeks." + "### Calculate the min, max and mean windspeeds and standard deviations of the windspeeds across all locations for each week (assume that the first week starts on January 2 1961) for the first 52 weeks." ] }, { @@ -4132,9 +4120,8 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -4148,20 +4135,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false + "version": "3.9.7" } }, "nbformat": 4, diff --git a/06_Stats/Wind_Stats/Solutions.ipynb b/06_Stats/Wind_Stats/Solutions.ipynb index 8f23339b4..b1381e0e7 100644 --- a/06_Stats/Wind_Stats/Solutions.ipynb +++ b/06_Stats/Wind_Stats/Solutions.ipynb @@ -63,29 +63,20 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://github.com/guipsamora/pandas_exercises/blob/master/06_Stats/Wind_Stats/wind.data)" + "### Import the dataset from this [address](https://github.com/guipsamora/pandas_exercises/blob/master/06_Stats/Wind_Stats/wind.data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called data and replace the first 3 columns by a proper datetime index." + "### Assign it to a variable called data and replace the first 3 columns by a proper datetime index." ] }, { @@ -228,7 +219,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Year 2061? Do we really have data from this year? Create a function to fix it and apply it." + "### Year 2061? Do we really have data from this year? Create a function to fix it and apply it." ] }, { @@ -371,7 +362,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Set the right dates as the index. Pay attention at the data type, it should be datetime64[ns]." + "### Set the right dates as the index. Pay attention at the data type, it should be datetime64[ns]." ] }, { @@ -525,7 +516,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Compute how many values are missing for each location over the entire record. \n", + "### Compute how many values are missing for each location over the entire record. \n", "#### They should be ignored in all calculations below. " ] }, @@ -563,7 +554,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Compute how many non-missing values there are in total." + "### Compute how many non-missing values there are in total." ] }, { @@ -602,7 +593,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Calculate the mean windspeeds of the windspeeds over all the locations and all the times.\n", + "### Calculate the mean windspeeds of the windspeeds over all the locations and all the times.\n", "#### A single number for the entire dataset." ] }, @@ -628,7 +619,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Create a DataFrame called loc_stats and calculate the min, max and mean windspeeds and standard deviations of the windspeeds at each location over all the days \n", + "### Create a DataFrame called loc_stats and calculate the min, max and mean windspeeds and standard deviations of the windspeeds at each location over all the days \n", "\n", "#### A different set of numbers for each location." ] @@ -800,7 +791,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Create a DataFrame called day_stats and calculate the min, max and mean windspeed and standard deviations of the windspeeds across all the locations at each day.\n", + "### Create a DataFrame called day_stats and calculate the min, max and mean windspeed and standard deviations of the windspeeds across all the locations at each day.\n", "\n", "#### A different set of numbers for each day." ] @@ -892,7 +883,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Find the average windspeed in January for each location. \n", + "### Find the average windspeed in January for each location. \n", "#### Treat January 1961 and January 1962 both as January." ] }, @@ -930,7 +921,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. Downsample the record to a yearly frequency for each location." + "### Downsample the record to a yearly frequency for each location." ] }, { @@ -1305,7 +1296,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. Downsample the record to a monthly frequency for each location." + "### Downsample the record to a monthly frequency for each location." ] }, { @@ -2414,7 +2405,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 14. Downsample the record to a weekly frequency for each location." + "### Downsample the record to a weekly frequency for each location." ] }, { @@ -3587,7 +3578,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 15. Calculate the min, max and mean windspeeds and standard deviations of the windspeeds across all locations for each week (assume that the first week starts on January 2 1961) for the first 52 weeks." + "### Calculate the min, max and mean windspeeds and standard deviations of the windspeeds across all locations for each week (assume that the first week starts on January 2 1961) for the first 52 weeks." ] }, { @@ -3961,9 +3952,8 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -3977,7 +3967,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/07_Visualization/.DS_Store b/07_Visualization/.DS_Store new file mode 100644 index 000000000..70f34f948 Binary files /dev/null and b/07_Visualization/.DS_Store differ diff --git a/07_Visualization/Chipotle/Exercise_with_Solutions.ipynb b/07_Visualization/Chipotle/Exercise_with_Solutions.ipynb index a020818f1..eed30f97c 100644 --- a/07_Visualization/Chipotle/Exercise_with_Solutions.ipynb +++ b/07_Visualization/Chipotle/Exercise_with_Solutions.ipynb @@ -14,37 +14,21 @@ "metadata": {}, "source": [ "This time we are going to pull data directly from the internet.\n", - "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "from collections import Counter\n", - "\n", - "# set this so the \n", - "%matplotlib inline" + "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called chipo." + "### Assign it to a variable called chipo." ] }, { @@ -62,7 +46,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. See the first 10 entries" + "### See the first 10 entries" ] }, { @@ -222,7 +206,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Create a histogram of the top 5 items bought" + "### Create a histogram of the top 5 items bought" ] }, { @@ -272,7 +256,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Create a scatterplot with the number of items orderered per order price\n", + "### Create a scatterplot with the number of items orderered per order price\n", "#### Hint: Price should be in the X-axis and Items ordered in the Y-axis" ] }, @@ -331,7 +315,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -339,7 +322,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -353,20 +336,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false + "version": "3.9.7" } }, "nbformat": 4, diff --git a/07_Visualization/Chipotle/Exercises.ipynb b/07_Visualization/Chipotle/Exercises.ipynb index 544991f0a..6e93b06cd 100644 --- a/07_Visualization/Chipotle/Exercises.ipynb +++ b/07_Visualization/Chipotle/Exercises.ipynb @@ -12,42 +12,25 @@ "metadata": {}, "source": [ "This time we are going to pull data directly from the internet.\n", - "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "from collections import Counter\n", - "\n", - "# set this so the graphs open internally\n", - "%matplotlib inline" + "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called chipo." + "### Assign it to a variable called chipo." ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -56,12 +39,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. See the first 10 entries" + "### See the first 10 entries" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "scrolled": false }, @@ -72,12 +54,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Create a histogram of the top 5 items bought" + "### Create a histogram of the top 5 items bought" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -86,13 +67,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Create a scatterplot with the number of items orderered per order price\n", + "### Create a scatterplot with the number of items orderered per order price\n", "#### Hint: Price should be in the X-axis and Items ordered in the Y-axis" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -101,12 +81,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. BONUS: Create a question and a graph to answer your own question." + "### BONUS: Create a question and a graph to answer your own question." ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -114,7 +93,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -128,7 +107,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/07_Visualization/Chipotle/Solutions.ipynb b/07_Visualization/Chipotle/Solutions.ipynb index 23e7498b3..5b004889a 100644 --- a/07_Visualization/Chipotle/Solutions.ipynb +++ b/07_Visualization/Chipotle/Solutions.ipynb @@ -12,37 +12,21 @@ "metadata": {}, "source": [ "This time we are going to pull data directly from the internet.\n", - "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "from collections import Counter\n", - "\n", - "# set this so the \n", - "%matplotlib inline" + "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called chipo." + "### Assign it to a variable called chipo." ] }, { @@ -60,7 +44,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. See the first 10 entries" + "### See the first 10 entries" ] }, { @@ -222,7 +206,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Create a histogram of the top 5 items bought" + "### Create a histogram of the top 5 items bought" ] }, { @@ -272,7 +256,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Create a scatterplot with the number of items orderered per order price\n", + "### Create a scatterplot with the number of items orderered per order price\n", "#### Hint: Price should be in the X-axis and Items ordered in the Y-axis" ] }, @@ -331,7 +315,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -339,7 +322,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -353,9 +336,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 1 -} \ No newline at end of file +} diff --git a/07_Visualization/Online_Retail/Exercises.ipynb b/07_Visualization/Online_Retail/Exercises.ipynb index e3289db38..8a73ed238 100644 --- a/07_Visualization/Online_Retail/Exercises.ipynb +++ b/07_Visualization/Online_Retail/Exercises.ipynb @@ -11,38 +11,26 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Introduction:\n", - "\n", - "\n", - "\n", - "### Step 1. Import the necessary libraries" + "### Introduction:" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/07_Visualization/Online_Retail/Online_Retail.csv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/07_Visualization/Online_Retail/Online_Retail.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called online_rt\n", + "### Assign it to a variable called online_rt\n", "Note: if you receive a utf-8 decode error, set `encoding = 'latin1'` in `pd.read_csv()`." ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -51,12 +39,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Create a histogram with the 10 countries that have the most 'Quantity' ordered except UK" + "### Create a histogram with the 10 countries that have the most 'Quantity' ordered except UK" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -65,12 +52,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Exclude negative Quantity entries" + "### Exclude negative Quantity entries" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -79,12 +65,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Create a scatterplot with the Quantity per UnitPrice by CustomerID for the top 3 Countries (except UK)" + "### Create a scatterplot with the Quantity per UnitPrice by CustomerID for the top 3 Countries (except UK)" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -93,19 +78,18 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Investigate why the previous results look so uninformative.\n", + "### Investigate why the previous results look so uninformative.\n", "\n", "This section might seem a bit tedious to go through. But I've thought of it as some kind of a simulation of problems one might encounter when dealing with data and other people. Besides there is a prize at the end (i.e. Section 8).\n", "\n", "(But feel free to jump right ahead into Section 8 if you want; it doesn't require that you finish this section.)\n", "\n", - "#### Step 7.1 Look at the first line of code in Step 6. And try to figure out if it leads to any kind of problem.\n", - "##### Step 7.1.1 Display the first few rows of that DataFrame." + "####1 Look at the first line of code in And try to figure out if it leads to any kind of problem.\n", + "#####1.1 Display the first few rows of that DataFrame." ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -114,12 +98,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Step 7.1.2 Think about what that piece of code does and display the dtype of `UnitPrice`" + "#####1.2 Think about what that piece of code does and display the dtype of `UnitPrice`" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -128,12 +111,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Step 7.1.3 Pull data from `online_rt`for `CustomerID`s 12346.0 and 12347.0." + "#####1.3 Pull data from `online_rt`for `CustomerID`s 12346.0 and 12347.0." ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -142,7 +124,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Step 7.2 Reinterpreting the initial problem.\n", + "####2 Reinterpreting the initial problem.\n", "\n", "To reiterate the question that we were dealing with: \n", "\"Create a scatterplot with the Quantity per UnitPrice by CustomerID for the top 3 Countries\"\n", @@ -158,12 +140,11 @@ "Total sales volume (i.e. total quantity sold) or total sales (i.e. revenue).\n", "This exercise goes for sales volume, so let's stick to that.\n", "\n", - "##### Step 7.2.1 Find out the top 3 countries in terms of sales volume." + "#####2.1 Find out the top 3 countries in terms of sales volume." ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -172,7 +153,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Step 7.2.2 \n", + "#####2.2 \n", "\n", "Now that we have the top 3 countries, we can focus on the rest of the problem: \n", "\"Quantity per UnitPrice by CustomerID\". \n", @@ -184,14 +165,13 @@ "*One axis will represent a Quantity assigned to a given customer. This is easy; we can just plot the total Quantity for each customer. \n", "*The other axis will represent a UnitPrice assigned to a given customer. Remember a single customer can have any number of orders with different prices, so summing up prices isn't quite helpful. Besides it's not quite clear what we mean when we say \"unit price per customer\"; it sounds like price of the customer! A reasonable alternative is that we assign each customer the average amount each has paid per item. So let's settle that question in that manner.\n", "\n", - "#### Step 7.3 Modify, select and plot data\n", - "##### Step 7.3.1 Add a column to online_rt called `Revenue` calculate the revenue (Quantity * UnitPrice) from each sale.\n", + "####3 Modify, select and plot data\n", + "#####3.1 Add a column to online_rt called `Revenue` calculate the revenue (Quantity * UnitPrice) from each sale.\n", "We will use this later to figure out an average price per customer." ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -200,12 +180,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Step 7.3.2 Group by `CustomerID` and `Country` and find out the average price (`AvgPrice`) each customer spends per unit." + "#####3.2 Group by `CustomerID` and `Country` and find out the average price (`AvgPrice`) each customer spends per unit." ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -214,12 +193,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Step 7.3.3 Plot" + "#####3.3 Plot" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -228,7 +206,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Step 7.4 What to do now?\n", + "####4 What to do now?\n", "We aren't much better-off than what we started with. The data are still extremely scattered around and don't seem quite informative.\n", "\n", "But we shouldn't despair!\n", @@ -238,12 +216,11 @@ "\n", "So: we should plot the data regardless of `Country` and hopefully see a less scattered graph.\n", "\n", - "##### Step 7.4.1 Plot the data for each `CustomerID` on a single graph" + "#####4.1 Plot the data for each `CustomerID` on a single graph" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -252,12 +229,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Step 7.4.2 Zoom in so we can see that curve more clearly" + "#####4.2 Zoom in so we can see that curve more clearly" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -279,7 +255,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -293,7 +268,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -309,7 +283,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -323,7 +296,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -331,7 +303,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -345,7 +317,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.0" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/07_Visualization/Online_Retail/Exercises_with_solutions_code.ipynb b/07_Visualization/Online_Retail/Exercises_with_solutions_code.ipynb index 01c15b4f6..516f20e3b 100644 --- a/07_Visualization/Online_Retail/Exercises_with_solutions_code.ipynb +++ b/07_Visualization/Online_Retail/Exercises_with_solutions_code.ipynb @@ -11,48 +11,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Introduction:\n", - "\n", - "\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true, - "jupyter": { - "outputs_hidden": true - } - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "# set the graphs to show in the jupyter notebook\n", - "%matplotlib inline\n", - "\n", - "# set seaborn graphs to a better style\n", - "sns.set(style=\"ticks\")" + "### Introduction:" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/07_Visualization/Online_Retail/Online_Retail.csv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/07_Visualization/Online_Retail/Online_Retail.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called online_rt\n", + "### Assign it to a variable called online_rt\n", "Note: if you receive a utf-8 decode error, set `encoding = 'latin1'` in `pd.read_csv()`." ] }, @@ -190,7 +163,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Create a histogram with the 10 countries that have the most 'Quantity' ordered except UK" + "### Create a histogram with the 10 countries that have the most 'Quantity' ordered except UK" ] }, { @@ -239,7 +212,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Exclude negative Quantity entries" + "### Exclude negative Quantity entries" ] }, { @@ -373,7 +346,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Create a scatterplot with the Quantity per UnitPrice by CustomerID for the top 3 Countries (except UK)" + "### Create a scatterplot with the Quantity per UnitPrice by CustomerID for the top 3 Countries (except UK)" ] }, { @@ -443,14 +416,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Investigate why the previous results look so uninformative.\n", + "### Investigate why the previous results look so uninformative.\n", "\n", "This section might seem a bit tedious to go through. But I've thought of it as some kind of a simulation of problems one might encounter when dealing with data and other people. Besides there is a prize at the end (i.e. Section 8).\n", "\n", "(But feel free to jump right ahead into Section 8 if you want; it doesn't require that you finish this section.)\n", "\n", - "#### Step 7.1 Look at the first line of code in Step 6. And try to figure out if it leads to any kind of problem.\n", - "##### Step 7.1.1 Display the first few rows of that DataFrame." + "####1 Look at the first line of code in And try to figure out if it leads to any kind of problem.\n", + "#####1.1 Display the first few rows of that DataFrame." ] }, { @@ -558,7 +531,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Step 7.1.2 Think about what that piece of code does and display the dtype of `UnitPrice`" + "#####1.2 Think about what that piece of code does and display the dtype of `UnitPrice`" ] }, { @@ -603,7 +576,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Step 7.1.3 Pull data from `online_rt`for `CustomerID`s 12346.0 and 12347.0." + "#####1.3 Pull data from `online_rt`for `CustomerID`s 12346.0 and 12347.0." ] }, { @@ -801,7 +774,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Step 7.2 Reinterpreting the initial problem.\n", + "####2 Reinterpreting the initial problem.\n", "\n", "To reiterate the question that we were dealing with: \n", "\"Create a scatterplot with the Quantity per UnitPrice by CustomerID for the top 3 Countries\"\n", @@ -817,7 +790,7 @@ "Total sales volume (i.e. total quantity sold) or total sales (i.e. revenue).\n", "This exercise goes for sales volume, so let's stick to that.\n", "\n", - "##### Step 7.2.1 Find out the top 3 countries in terms of sales volume." + "#####2.1 Find out the top 3 countries in terms of sales volume." ] }, { @@ -852,7 +825,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Step 7.2.2 \n", + "#####2.2 \n", "\n", "Now that we have the top 3 countries, we can focus on the rest of the problem: \n", "\"Quantity per UnitPrice by CustomerID\". \n", @@ -864,8 +837,8 @@ "*One axis will represent a Quantity assigned to a given customer. This is easy; we can just plot the total Quantity for each customer. \n", "*The other axis will represent a UnitPrice assigned to a given customer. Remember a single customer can have any number of orders with different prices, so summing up prices isn't quite helpful. Besides it's not quite clear what we mean when we say \"unit price per customer\"; it sounds like price of the customer! A reasonable alternative is that we assign each customer the average amount each has paid per item. So let's settle that question in that manner.\n", "\n", - "#### Step 7.3 Modify, select and plot data\n", - "##### Step 7.3.1 Add a column to online_rt called `Revenue` calculate the revenue (Quantity * UnitPrice) from each sale.\n", + "####3 Modify, select and plot data\n", + "#####3.1 Add a column to online_rt called `Revenue` calculate the revenue (Quantity * UnitPrice) from each sale.\n", "We will use this later to figure out an average price per customer." ] }, @@ -1006,7 +979,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Step 7.3.2 Group by `CustomerID` and `Country` and find out the average price (`AvgPrice`) each customer spends per unit." + "#####3.2 Group by `CustomerID` and `Country` and find out the average price (`AvgPrice`) each customer spends per unit." ] }, { @@ -1130,7 +1103,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Step 7.3.3 Plot" + "#####3.3 Plot" ] }, { @@ -1175,7 +1148,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Step 7.4 What to do now?\n", + "####4 What to do now?\n", "We aren't much better-off than what we started with. The data are still extremely scattered around and don't seem quite informative.\n", "\n", "But we shouldn't despair!\n", @@ -1185,7 +1158,7 @@ "\n", "So: we should plot the data regardless of `Country` and hopefully see a less scattered graph.\n", "\n", - "##### Step 7.4.1 Plot the data for each `CustomerID` on a single graph" + "#####4.1 Plot the data for each `CustomerID` on a single graph" ] }, { @@ -1238,7 +1211,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Step 7.4.2 Zoom in so we can see that curve more clearly" + "#####4.2 Zoom in so we can see that curve more clearly" ] }, { @@ -1450,7 +1423,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true, "jupyter": { @@ -1462,9 +1434,8 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1478,7 +1449,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/07_Visualization/Online_Retail/Solutions.ipynb b/07_Visualization/Online_Retail/Solutions.ipynb index 1ec98b203..c31e34d1f 100644 --- a/07_Visualization/Online_Retail/Solutions.ipynb +++ b/07_Visualization/Online_Retail/Solutions.ipynb @@ -11,34 +11,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Introduction:\n", - "\n", - "\n", - "\n", - "### Step 1. Import the necessary libraries" + "### Introduction:" ] }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/07_Visualization/Online_Retail/Online_Retail.csv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/07_Visualization/Online_Retail/Online_Retail.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called online_rt\n", + "### Assign it to a variable called online_rt\n", "Note: if you receive a utf-8 decode error, set `encoding = 'latin1'` in `pd.read_csv()`." ] }, @@ -167,7 +154,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Create a histogram with the 10 countries that have the most 'Quantity' ordered except UK" + "### Create a histogram with the 10 countries that have the most 'Quantity' ordered except UK" ] }, { @@ -196,7 +183,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Exclude negative Quantity entries" + "### Exclude negative Quantity entries" ] }, { @@ -324,7 +311,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Create a scatterplot with the Quantity per UnitPrice by CustomerID for the top 3 Countries (except UK)" + "### Create a scatterplot with the Quantity per UnitPrice by CustomerID for the top 3 Countries (except UK)" ] }, { @@ -363,14 +350,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Investigate why the previous results look so uninformative.\n", + "### Investigate why the previous results look so uninformative.\n", "\n", "This section might seem a bit tedious to go through. But I've thought of it as some kind of a simulation of problems one might encounter when dealing with data and other people. Besides there is a prize at the end (i.e. Section 8).\n", "\n", "(But feel free to jump right ahead into Section 8 if you want; it doesn't require that you finish this section.)\n", "\n", - "#### Step 7.1 Look at the first line of code in Step 6. And try to figure out if it leads to any kind of problem.\n", - "##### Step 7.1.1 Display the first few rows of that DataFrame." + "####1 Look at the first line of code in And try to figure out if it leads to any kind of problem.\n", + "#####1.1 Display the first few rows of that DataFrame." ] }, { @@ -468,7 +455,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Step 7.1.2 Think about what that piece of code does and display the dtype of `UnitPrice`" + "#####1.2 Think about what that piece of code does and display the dtype of `UnitPrice`" ] }, { @@ -495,7 +482,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Step 7.1.3 Pull data from `online_rt`for `CustomerID`s 12346.0 and 12347.0." + "#####1.3 Pull data from `online_rt`for `CustomerID`s 12346.0 and 12347.0." ] }, { @@ -680,7 +667,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Step 7.2 Reinterpreting the initial problem.\n", + "####2 Reinterpreting the initial problem.\n", "\n", "To reiterate the question that we were dealing with: \n", "\"Create a scatterplot with the Quantity per UnitPrice by CustomerID for the top 3 Countries\"\n", @@ -696,7 +683,7 @@ "Total sales volume (i.e. total quantity sold) or total sales (i.e. revenue).\n", "This exercise goes for sales volume, so let's stick to that.\n", "\n", - "##### Step 7.2.1 Find out the top 3 countries in terms of sales volume." + "#####2.1 Find out the top 3 countries in terms of sales volume." ] }, { @@ -723,7 +710,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Step 7.2.2 \n", + "#####2.2 \n", "\n", "Now that we have the top 3 countries, we can focus on the rest of the problem: \n", "\"Quantity per UnitPrice by CustomerID\". \n", @@ -735,8 +722,8 @@ "*One axis will represent a Quantity assigned to a given customer. This is easy; we can just plot the total Quantity for each customer. \n", "*The other axis will represent a UnitPrice assigned to a given customer. Remember a single customer can have any number of orders with different prices, so summing up prices isn't quite helpful. Besides it's not quite clear what we mean when we say \"unit price per customer\"; it sounds like price of the customer! A reasonable alternative is that we assign each customer the average amount each has paid per item. So let's settle that question in that manner.\n", "\n", - "#### Step 7.3 Modify, select and plot data\n", - "##### Step 7.3.1 Add a column to online_rt called `Revenue` calculate the revenue (Quantity * UnitPrice) from each sale.\n", + "####3 Modify, select and plot data\n", + "#####3.1 Add a column to online_rt called `Revenue` calculate the revenue (Quantity * UnitPrice) from each sale.\n", "We will use this later to figure out an average price per customer." ] }, @@ -871,7 +858,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Step 7.3.2 Group by `CustomerID` and `Country` and find out the average price (`AvgPrice`) each customer spends per unit." + "#####3.2 Group by `CustomerID` and `Country` and find out the average price (`AvgPrice`) each customer spends per unit." ] }, { @@ -983,7 +970,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Step 7.3.3 Plot" + "#####3.3 Plot" ] }, { @@ -1012,7 +999,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Step 7.4 What to do now?\n", + "####4 What to do now?\n", "We aren't much better-off than what we started with. The data are still extremely scattered around and don't seem quite informative.\n", "\n", "But we shouldn't despair!\n", @@ -1022,7 +1009,7 @@ "\n", "So: we should plot the data regardless of `Country` and hopefully see a less scattered graph.\n", "\n", - "##### Step 7.4.1 Plot the data for each `CustomerID` on a single graph" + "#####4.1 Plot the data for each `CustomerID` on a single graph" ] }, { @@ -1061,7 +1048,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Step 7.4.2 Zoom in so we can see that curve more clearly" + "#####4.2 Zoom in so we can see that curve more clearly" ] }, { @@ -1206,7 +1193,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -1215,23 +1201,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [conda root]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "conda-root-py" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/07_Visualization/Scores/Exercises.ipynb b/07_Visualization/Scores/Exercises.ipynb index d887c2ef2..63241ed71 100644 --- a/07_Visualization/Scores/Exercises.ipynb +++ b/07_Visualization/Scores/Exercises.ipynb @@ -15,25 +15,14 @@ "\n", "This time you will create the data.\n", "\n", - "***Exercise based on [Chris Albon](http://chrisalbon.com/) work, the credits belong to him.***\n", - "\n", - "### Step 1. Import the necessary libraries" + "***Exercise based on [Chris Albon](http://chrisalbon.com/) work, the credits belong to him.***" ] }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Create the DataFrame that should look like the one below." + "### Create the DataFrame that should look like the one below." ] }, { @@ -129,13 +118,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Create a Scatterplot of preTestScore and postTestScore, with the size of each point determined by age\n", + "### Create a Scatterplot of preTestScore and postTestScore, with the size of each point determined by age\n", "#### Hint: Don't forget to place the labels" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -146,13 +134,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Create a Scatterplot of preTestScore and postTestScore.\n", + "### Create a Scatterplot of preTestScore and postTestScore.\n", "### This time the size should be 4.5 times the postTestScore and the color determined by sex" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -168,7 +155,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -178,21 +164,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/07_Visualization/Scores/Exercises_with_solutions_code.ipynb b/07_Visualization/Scores/Exercises_with_solutions_code.ipynb index 664e94d21..562d09be2 100644 --- a/07_Visualization/Scores/Exercises_with_solutions_code.ipynb +++ b/07_Visualization/Scores/Exercises_with_solutions_code.ipynb @@ -15,31 +15,14 @@ "\n", "This time you will create the data.\n", "\n", - "***Exercise based on [Chris Albon](http://chrisalbon.com/) work, the credits belong to him.***\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "\n", - "%matplotlib inline" + "***Exercise based on [Chris Albon](http://chrisalbon.com/) work, the credits belong to him.***" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Create the DataFrame it should look like below." + "### Create the DataFrame it should look like below." ] }, { @@ -146,7 +129,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Create a Scatterplot of preTestScore and postTestScore, with the size of each point determined by age\n", + "### Create a Scatterplot of preTestScore and postTestScore, with the size of each point determined by age\n", "#### Hint: Don't forget to place the labels" ] }, @@ -191,7 +174,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Create a Scatterplot of preTestScore and postTestScore.\n", + "### Create a Scatterplot of preTestScore and postTestScore.\n", "### This time the size should be 4.5 times the postTestScore and the color determined by sex" ] }, @@ -241,7 +224,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -251,21 +233,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/07_Visualization/Scores/Solutions.ipynb b/07_Visualization/Scores/Solutions.ipynb index cdc25cf69..290fe0400 100644 --- a/07_Visualization/Scores/Solutions.ipynb +++ b/07_Visualization/Scores/Solutions.ipynb @@ -15,31 +15,14 @@ "\n", "This time you will create the data.\n", "\n", - "***Exercise based on [Chris Albon](http://chrisalbon.com/) work, the credits belong to him.***\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "\n", - "%matplotlib inline" + "***Exercise based on [Chris Albon](http://chrisalbon.com/) work, the credits belong to him.***" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Create the DataFrame it should look like below." + "### Create the DataFrame it should look like below." ] }, { @@ -135,7 +118,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Create a Scatterplot of preTestScore and postTestScore, with the size of each point determined by age\n", + "### Create a Scatterplot of preTestScore and postTestScore, with the size of each point determined by age\n", "#### Hint: Don't forget to place the labels" ] }, @@ -173,7 +156,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Create a Scatterplot of preTestScore and postTestScore.\n", + "### Create a Scatterplot of preTestScore and postTestScore.\n", "### This time the size should be 4.5 times the postTestScore and the color determined by sex" ] }, @@ -216,7 +199,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -226,21 +208,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/07_Visualization/Tips/Exercises.ipynb b/07_Visualization/Tips/Exercises.ipynb index 24544569c..eb5800246 100644 --- a/07_Visualization/Tips/Exercises.ipynb +++ b/07_Visualization/Tips/Exercises.ipynb @@ -14,35 +14,25 @@ "### Introduction:\n", "\n", "This exercise was created based on the tutorial and documentation from [Seaborn](https://stanford.edu/~mwaskom/software/seaborn/index.html) \n", - "The dataset being used is tips from Seaborn.\n", - "\n", - "### Step 1. Import the necessary libraries:" + "The dataset being used is tips from Seaborn." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/07_Visualization/Tips/tips.csv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/07_Visualization/Tips/tips.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called tips" + "### Assign it to a variable called tips" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -51,12 +41,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Delete the Unnamed 0 column" + "### Delete the Unnamed 0 column" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -65,12 +54,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Plot the total_bill column histogram" + "### Plot the total_bill column histogram" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -79,12 +67,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Create a scatter plot presenting the relationship between total_bill and tip" + "### Create a scatter plot presenting the relationship between total_bill and tip" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -93,13 +80,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Create one image with the relationship of total_bill, tip and size.\n", + "### Create one image with the relationship of total_bill, tip and size.\n", "#### Hint: It is just one function." ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -108,12 +94,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Present the relationship between days and total_bill value" + "### Present the relationship between days and total_bill value" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -122,12 +107,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Create a scatter plot with the day as the y-axis and tip as the x-axis, differ the dots by sex" + "### Create a scatter plot with the day as the y-axis and tip as the x-axis, differ the dots by sex" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -136,12 +120,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Create a box plot presenting the total_bill per day differetiation the time (Dinner or Lunch)" + "### Create a box plot presenting the total_bill per day differetiation the time (Dinner or Lunch)" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -150,12 +133,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Create two histograms of the tip value based for Dinner and Lunch. They must be side by side." + "### Create two histograms of the tip value based for Dinner and Lunch. They must be side by side." ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -164,13 +146,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. Create two scatterplots graphs, one for Male and another for Female, presenting the total_bill value and tip relationship, differing by smoker or no smoker\n", + "### Create two scatterplots graphs, one for Male and another for Female, presenting the total_bill value and tip relationship, differing by smoker or no smoker\n", "### They must be side by side." ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -184,7 +165,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -193,9 +173,8 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -209,7 +188,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.0" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/07_Visualization/Tips/Exercises_with_code_and_solutions.ipynb b/07_Visualization/Tips/Exercises_with_code_and_solutions.ipynb index b78b0f582..81b688fd4 100644 --- a/07_Visualization/Tips/Exercises_with_code_and_solutions.ipynb +++ b/07_Visualization/Tips/Exercises_with_code_and_solutions.ipynb @@ -16,43 +16,21 @@ "### Introduction:\n", "\n", "This exercise was created based on the tutorial and documentation from [Seaborn](https://stanford.edu/~mwaskom/software/seaborn/index.html) \n", - "The dataset being used is tips from Seaborn.\n", - "\n", - "### Step 1. Import the necessary libraries:" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "# visualization libraries\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "\n", - "# print the graphs in the notebook\n", - "% matplotlib inline\n", - "\n", - "# set seaborn style to white\n", - "sns.set_style(\"white\")" + "The dataset being used is tips from Seaborn." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/07_Visualization/Tips/tips.csv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/07_Visualization/Tips/tips.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called tips" + "### Assign it to a variable called tips" ] }, { @@ -163,7 +141,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Delete the Unnamed 0 column" + "### Delete the Unnamed 0 column" ] }, { @@ -267,7 +245,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Plot the total_bill column histogram" + "### Plot the total_bill column histogram" ] }, { @@ -301,7 +279,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Create a scatter plot presenting the relationship between total_bill and tip" + "### Create a scatter plot presenting the relationship between total_bill and tip" ] }, { @@ -338,7 +316,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Create one image with the relationship of total_bill, tip and size.\n", + "### Create one image with the relationship of total_bill, tip and size.\n", "#### Hint: It is just one function." ] }, @@ -376,7 +354,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Present the relationship between days and total_bill value" + "### Present the relationship between days and total_bill value" ] }, { @@ -403,7 +381,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Create a scatter plot with the day as the y-axis and tip as the x-axis, differ the dots by sex" + "### Create a scatter plot with the day as the y-axis and tip as the x-axis, differ the dots by sex" ] }, { @@ -430,7 +408,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Create a box plot presenting the total_bill per day differetiation the time (Dinner or Lunch)" + "### Create a box plot presenting the total_bill per day differetiation the time (Dinner or Lunch)" ] }, { @@ -457,7 +435,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Create two histograms of the tip value based for Dinner and Lunch. They must be side by side." + "### Create two histograms of the tip value based for Dinner and Lunch. They must be side by side." ] }, { @@ -489,7 +467,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. Create two scatterplots graphs, one for Male and another for Female, presenting the total_bill value and tip relationship, differing by smoker or no smoker\n", + "### Create two scatterplots graphs, one for Male and another for Female, presenting the total_bill value and tip relationship, differing by smoker or no smoker\n", "### They must be side by side." ] }, @@ -525,7 +503,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -535,7 +512,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -549,20 +526,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false + "version": "3.9.7" } }, "nbformat": 4, diff --git a/07_Visualization/Tips/Solutions.ipynb b/07_Visualization/Tips/Solutions.ipynb index 96ce17999..d7970a7d5 100644 --- a/07_Visualization/Tips/Solutions.ipynb +++ b/07_Visualization/Tips/Solutions.ipynb @@ -14,43 +14,21 @@ "### Introduction:\n", "\n", "This exercise was created based on the tutorial and documentation from [Seaborn](https://stanford.edu/~mwaskom/software/seaborn/index.html) \n", - "The dataset being used is tips from Seaborn.\n", - "\n", - "### Step 1. Import the necessary libraries:" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "# visualization libraries\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "\n", - "# print the graphs in the notebook\n", - "% matplotlib inline\n", - "\n", - "# set seaborn style to white\n", - "sns.set_style(\"white\")" + "The dataset being used is tips from Seaborn." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/07_Visualization/Tips/tips.csv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/07_Visualization/Tips/tips.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called tips" + "### Assign it to a variable called tips" ] }, { @@ -156,7 +134,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Delete the Unnamed 0 column" + "### Delete the Unnamed 0 column" ] }, { @@ -256,7 +234,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Plot the total_bill column histogram" + "### Plot the total_bill column histogram" ] }, { @@ -281,7 +259,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Create a scatter plot presenting the relationship between total_bill and tip" + "### Create a scatter plot presenting the relationship between total_bill and tip" ] }, { @@ -316,7 +294,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Create one image with the relationship of total_bill, tip and size.\n", + "### Create one image with the relationship of total_bill, tip and size.\n", "#### Hint: It is just one function." ] }, @@ -352,7 +330,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Present the relationship between days and total_bill value" + "### Present the relationship between days and total_bill value" ] }, { @@ -377,7 +355,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Create a scatter plot with the day as the y-axis and tip as the x-axis, differ the dots by sex" + "### Create a scatter plot with the day as the y-axis and tip as the x-axis, differ the dots by sex" ] }, { @@ -402,7 +380,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Create a box plot presenting the total_bill per day differetiation the time (Dinner or Lunch)" + "### Create a box plot presenting the total_bill per day differetiation the time (Dinner or Lunch)" ] }, { @@ -427,7 +405,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Create two histograms of the tip value based for Dinner and Lunch. They must be side by side." + "### Create two histograms of the tip value based for Dinner and Lunch. They must be side by side." ] }, { @@ -452,7 +430,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. Create two scatterplots graphs, one for Male and another for Female, presenting the total_bill value and tip relationship, differing by smoker or no smoker\n", + "### Create two scatterplots graphs, one for Male and another for Female, presenting the total_bill value and tip relationship, differing by smoker or no smoker\n", "### They must be side by side." ] }, @@ -483,7 +461,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -493,7 +470,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -507,7 +484,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.0" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/07_Visualization/Titanic_Desaster/Exercises.ipynb b/07_Visualization/Titanic_Desaster/Exercises.ipynb index 001f52f7c..0d2d87ed4 100644 --- a/07_Visualization/Titanic_Desaster/Exercises.ipynb +++ b/07_Visualization/Titanic_Desaster/Exercises.ipynb @@ -14,36 +14,25 @@ "### Introduction:\n", "\n", "This exercise is based on the titanic Disaster dataset avaiable at [Kaggle](https://www.kaggle.com/c/titanic). \n", - "To know more about the variables check [here](https://www.kaggle.com/c/titanic/data)\n", - "\n", - "\n", - "### Step 1. Import the necessary libraries" + "To know more about the variables check [here](https://www.kaggle.com/c/titanic/data)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/07_Visualization/Titanic_Desaster/train.csv)" + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/07_Visualization/Titanic_Desaster/train.csv)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable titanic " + "### Assign it to a variable titanic " ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -52,12 +41,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Set PassengerId as the index " + "### Set PassengerId as the index " ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -66,12 +54,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Create a pie chart presenting the male/female proportion" + "### Create a pie chart presenting the male/female proportion" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -80,12 +67,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Create a scatterplot with the Fare payed and the Age, differ the plot color by gender" + "### Create a scatterplot with the Fare payed and the Age, differ the plot color by gender" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -94,12 +80,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. How many people survived?" + "### How many people survived?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -108,12 +93,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Create a histogram with the Fare payed" + "### Create a histogram with the Fare payed" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -127,7 +111,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -137,21 +120,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.16" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/07_Visualization/Titanic_Desaster/Exercises_code_with_solutions.ipynb b/07_Visualization/Titanic_Desaster/Exercises_code_with_solutions.ipynb index ee7a4619c..e141178b3 100644 --- a/07_Visualization/Titanic_Desaster/Exercises_code_with_solutions.ipynb +++ b/07_Visualization/Titanic_Desaster/Exercises_code_with_solutions.ipynb @@ -16,38 +16,21 @@ "### Introduction:\n", "\n", "This exercise is based on the titanic Disaster dataset avaiable at [Kaggle](https://www.kaggle.com/c/titanic). \n", - "To know more about the variables check [here](https://www.kaggle.com/c/titanic/data)\n", - "\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "import numpy as np\n", - "\n", - "%matplotlib inline" + "To know more about the variables check [here](https://www.kaggle.com/c/titanic/data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/07_Visualization/Titanic_Desaster/train.csv) " + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/07_Visualization/Titanic_Desaster/train.csv) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable titanic " + "### Assign it to a variable titanic " ] }, { @@ -197,7 +180,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Set PassengerId as the index " + "### Set PassengerId as the index " ] }, { @@ -354,7 +337,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Create a pie chart presenting the male/female proportion" + "### Create a pie chart presenting the male/female proportion" ] }, { @@ -420,7 +403,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Create a scatterplot with the Fare payed and the Age, differ the plot color by gender" + "### Create a scatterplot with the Fare payed and the Age, differ the plot color by gender" ] }, { @@ -466,7 +449,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. How many people survived?" + "### How many people survived?" ] }, { @@ -493,7 +476,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Create a histogram with the Fare payed" + "### Create a histogram with the Fare payed" ] }, { @@ -542,7 +525,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -552,7 +534,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -566,20 +548,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false + "version": "3.9.7" } }, "nbformat": 4, diff --git a/07_Visualization/Titanic_Desaster/Solutions.ipynb b/07_Visualization/Titanic_Desaster/Solutions.ipynb index 1f6e71784..e025d8539 100644 --- a/07_Visualization/Titanic_Desaster/Solutions.ipynb +++ b/07_Visualization/Titanic_Desaster/Solutions.ipynb @@ -14,38 +14,21 @@ "### Introduction:\n", "\n", "This exercise is based on the titanic Disaster dataset avaiable at [Kaggle](https://www.kaggle.com/c/titanic). \n", - "To know more about the variables check [here](https://www.kaggle.com/c/titanic/data)\n", - "\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "import numpy as np\n", - "\n", - "%matplotlib inline" + "To know more about the variables check [here](https://www.kaggle.com/c/titanic/data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/07_Visualization/Titanic_Desaster/train.csv) " + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/07_Visualization/Titanic_Desaster/train.csv) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable titanic " + "### Assign it to a variable titanic " ] }, { @@ -189,7 +172,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Set PassengerId as the index " + "### Set PassengerId as the index " ] }, { @@ -344,7 +327,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Create a pie chart presenting the male/female proportion" + "### Create a pie chart presenting the male/female proportion" ] }, { @@ -369,7 +352,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Create a scatterplot with the Fare payed and the Age, differ the plot color by gender" + "### Create a scatterplot with the Fare payed and the Age, differ the plot color by gender" ] }, { @@ -404,7 +387,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. How many people survived?" + "### How many people survived?" ] }, { @@ -429,7 +412,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Create a histogram with the Fare payed" + "### Create a histogram with the Fare payed" ] }, { @@ -459,7 +442,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -469,21 +451,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.16" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/08_Creating_Series_and_DataFrames/Pokemon/Exercises-with-solutions-and-code.ipynb b/08_Creating_Series_and_DataFrames/Pokemon/Exercises-with-solutions-and-code.ipynb index 3345139a1..5467fcc62 100644 --- a/08_Creating_Series_and_DataFrames/Pokemon/Exercises-with-solutions-and-code.ipynb +++ b/08_Creating_Series_and_DataFrames/Pokemon/Exercises-with-solutions-and-code.ipynb @@ -13,29 +13,14 @@ "source": [ "### Introduction:\n", "\n", - "This time you will create the data.\n", - "\n", - "\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import pandas as pd" + "This time you will create the data." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Create a data dictionary" + "### Create a data dictionary" ] }, { @@ -58,7 +43,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called pokemon" + "### Assign it to a variable called pokemon" ] }, { @@ -142,7 +127,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Ops...it seems the DataFrame columns are in alphabetical order. Place the order of the columns as name, type, hp, evolution, pokedex" + "### Ops...it seems the DataFrame columns are in alphabetical order. Place the order of the columns as name, type, hp, evolution, pokedex" ] }, { @@ -226,7 +211,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Add another column called place, and insert what you have in mind." + "### Add another column called place, and insert what you have in mind." ] }, { @@ -315,7 +300,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Present the type of each column" + "### Present the type of each column" ] }, { @@ -354,7 +339,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -364,21 +348,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/08_Creating_Series_and_DataFrames/Pokemon/Exercises.ipynb b/08_Creating_Series_and_DataFrames/Pokemon/Exercises.ipynb index 360204628..6dbe2dd17 100644 --- a/08_Creating_Series_and_DataFrames/Pokemon/Exercises.ipynb +++ b/08_Creating_Series_and_DataFrames/Pokemon/Exercises.ipynb @@ -13,27 +13,14 @@ "source": [ "### Introduction:\n", "\n", - "This time you will create the data.\n", - "\n", - "\n", - "\n", - "### Step 1. Import the necessary libraries" + "This time you will create the data." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Create a data dictionary that looks like the DataFrame below" + "### Create a data dictionary that looks like the DataFrame below" ] }, { @@ -49,7 +36,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called pokemon" + "### Assign it to a variable called pokemon" ] }, { @@ -130,12 +117,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Ops...it seems the DataFrame columns are in alphabetical order. Place the order of the columns as name, type, hp, evolution, pokedex" + "### Ops...it seems the DataFrame columns are in alphabetical order. Place the order of the columns as name, type, hp, evolution, pokedex" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -146,12 +132,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Add another column called place, and insert what you have in mind." + "### Add another column called place, and insert what you have in mind." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -162,12 +147,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Present the type of each column" + "### Present the type of each column" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -183,7 +167,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -193,21 +176,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/08_Creating_Series_and_DataFrames/Pokemon/Solutions.ipynb b/08_Creating_Series_and_DataFrames/Pokemon/Solutions.ipynb index a8cc38601..ce62d7163 100644 --- a/08_Creating_Series_and_DataFrames/Pokemon/Solutions.ipynb +++ b/08_Creating_Series_and_DataFrames/Pokemon/Solutions.ipynb @@ -13,27 +13,14 @@ "source": [ "### Introduction:\n", "\n", - "This time you will create the data.\n", - "\n", - "\n", - "\n", - "### Step 1. Import the necessary libraries" + "This time you will create the data." ] }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Create a data dictionary" + "### Create a data dictionary" ] }, { @@ -49,7 +36,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called pokemon" + "### Assign it to a variable called pokemon" ] }, { @@ -130,7 +117,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Ops...it seems the DataFrame columns are in alphabetical order. Place the order of the columns as name, type, hp, evolution, pokedex" + "### Ops...it seems the DataFrame columns are in alphabetical order. Place the order of the columns as name, type, hp, evolution, pokedex" ] }, { @@ -211,7 +198,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Add another column called place, and insert what you have in mind." + "### Add another column called place, and insert what you have in mind." ] }, { @@ -297,7 +284,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Present the type of each column" + "### Present the type of each column" ] }, { @@ -334,7 +321,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -344,21 +330,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/09_Time_Series/.DS_Store b/09_Time_Series/.DS_Store new file mode 100644 index 000000000..b65e1af1d Binary files /dev/null and b/09_Time_Series/.DS_Store differ diff --git a/09_Time_Series/Apple_Stock/Exercises-with-solutions-code.ipynb b/09_Time_Series/Apple_Stock/Exercises-with-solutions-code.ipynb index e2bbb9b0a..c10994c55 100644 --- a/09_Time_Series/Apple_Stock/Exercises-with-solutions-code.ipynb +++ b/09_Time_Series/Apple_Stock/Exercises-with-solutions-code.ipynb @@ -15,39 +15,21 @@ "source": [ "### Introduction:\n", "\n", - "We are going to use Apple's stock price.\n", - "\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "\n", - "# visualization\n", - "import matplotlib.pyplot as plt\n", - "\n", - "%matplotlib inline" + "We are going to use Apple's stock price." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/09_Time_Series/Apple_Stock/appl_1980_2014.csv)" + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/09_Time_Series/Apple_Stock/appl_1980_2014.csv)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable apple" + "### Assign it to a variable apple" ] }, { @@ -152,7 +134,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Check out the type of the columns" + "### Check out the type of the columns" ] }, { @@ -186,7 +168,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Transform the Date column as a datetime type" + "### Transform the Date column as a datetime type" ] }, { @@ -220,7 +202,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Set the date as the index" + "### Set the date as the index" ] }, { @@ -328,7 +310,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Is there any duplicate dates?" + "### Is there any duplicate dates?" ] }, { @@ -356,7 +338,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Ops...it seems the index is from the most recent date. Make the first entry the oldest date." + "### Ops...it seems the index is from the most recent date. Make the first entry the oldest date." ] }, { @@ -462,7 +444,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Get the last business day of each month" + "### Get the last business day of each month" ] }, { @@ -570,7 +552,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. What is the difference in days between the first day and the oldest" + "### What is the difference in days between the first day and the oldest" ] }, { @@ -597,7 +579,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. How many months in the data we have?" + "### How many months in the data we have?" ] }, { @@ -626,7 +608,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. Plot the 'Adj Close' value. Set the size of the figure to 13.5 x 9 inches" + "### Plot the 'Adj Close' value. Set the size of the figure to 13.5 x 9 inches" ] }, { @@ -663,18 +645,19 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [] } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -688,22 +671,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false + "version": "3.9.7" } }, "nbformat": 4, - "nbformat_minor": 1 + "nbformat_minor": 4 } diff --git a/09_Time_Series/Apple_Stock/Exercises.ipynb b/09_Time_Series/Apple_Stock/Exercises.ipynb index ac13ee923..7d92fb7ac 100644 --- a/09_Time_Series/Apple_Stock/Exercises.ipynb +++ b/09_Time_Series/Apple_Stock/Exercises.ipynb @@ -13,146 +13,1890 @@ "source": [ "### Introduction:\n", "\n", - "We are going to use Apple's stock price.\n", - "\n", - "\n", - "### Step 1. Import the necessary libraries" + "We are going to use Apple's stock price." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/09_Time_Series/Apple_Stock/appl_1980_2014.csv)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { - "collapsed": false + "tags": [] + }, + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "invalid syntax (1805426779.py, line 1)", + "output_type": "error", + "traceback": [ + "\u001b[0;36m File \u001b[0;32m\"/var/folders/24/tg28vxls25l9mjvqrnh0plc80000gn/T/ipykernel_37619/1805426779.py\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/09_Time_Series/Apple_Stock/appl_1980_2014.csv\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" + ] + } + ], + "source": [ + "https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/09_Time_Series/Apple_Stock/appl_1980_2014.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "tags": [] }, "outputs": [], - "source": [] + "source": [ + "url = 'https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/09_Time_Series/Apple_Stock/appl_1980_2014.csv'" + ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 4, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/09_Time_Series/Apple_Stock/appl_1980_2014.csv'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/09_Time_Series/Apple_Stock/appl_1980_2014.csv)" + "url" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'HTTPS://RAW.GITHUBUSERCONTENT.COM/GUIPSAMORA/PANDAS_EXERCISES/MASTER/09_TIME_SERIES/APPLE_STOCK/APPL_1980_2014.CSV'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "url.upper()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/09_time_series/apple_stock/appl_1980_2014.csv'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "url.capitalize()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Https://Raw.Githubusercontent.Com/Guipsamora/Pandas_Exercises/Master/09_Time_Series/Apple_Stock/Appl_1980_2014.Csv'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "url.title()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'str' object has no attribute 'csv'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/24/tg28vxls25l9mjvqrnh0plc80000gn/T/ipykernel_37619/2103389992.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0murl\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcsv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m: 'str' object has no attribute 'csv'" + ] + } + ], + "source": [ + "url.csv()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, "outputs": [], - "source": [] + "source": [ + "import pandas" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateOpenHighLowCloseVolumeAdj Close
02014-07-0896.2796.8093.9295.356513000095.35
12014-07-0794.1495.9994.1095.975630540095.97
22014-07-0393.6794.1093.2094.032289180094.03
32014-07-0293.8794.0693.0993.482842090093.48
42014-07-0193.5294.0793.1393.523817020093.52
........................
84601980-12-1826.6326.7526.6326.63183624000.41
84611980-12-1725.8726.0025.8725.87216104000.40
84621980-12-1625.3725.3725.2525.25264320000.39
84631980-12-1527.3827.3827.2527.25439712000.42
84641980-12-1228.7528.8728.7528.751172584000.45
\n", + "

8465 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " Date Open High Low Close Volume Adj Close\n", + "0 2014-07-08 96.27 96.80 93.92 95.35 65130000 95.35\n", + "1 2014-07-07 94.14 95.99 94.10 95.97 56305400 95.97\n", + "2 2014-07-03 93.67 94.10 93.20 94.03 22891800 94.03\n", + "3 2014-07-02 93.87 94.06 93.09 93.48 28420900 93.48\n", + "4 2014-07-01 93.52 94.07 93.13 93.52 38170200 93.52\n", + "... ... ... ... ... ... ... ...\n", + "8460 1980-12-18 26.63 26.75 26.63 26.63 18362400 0.41\n", + "8461 1980-12-17 25.87 26.00 25.87 25.87 21610400 0.40\n", + "8462 1980-12-16 25.37 25.37 25.25 25.25 26432000 0.39\n", + "8463 1980-12-15 27.38 27.38 27.25 27.25 43971200 0.42\n", + "8464 1980-12-12 28.75 28.87 28.75 28.75 117258400 0.45\n", + "\n", + "[8465 rows x 7 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pandas.read_csv(filepath_or_buffer=url)" + ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable apple" + "### Assign it to a variable apple" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.frame.DataFrame" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(pandas.read_csv(filepath_or_buffer=url))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], - "source": [] + "source": [ + "df_apple = pandas.read_csv(filepath_or_buffer=url)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateOpenHighLowCloseVolumeAdj Close
02014-07-0896.2796.8093.9295.356513000095.35
12014-07-0794.1495.9994.1095.975630540095.97
22014-07-0393.6794.1093.2094.032289180094.03
32014-07-0293.8794.0693.0993.482842090093.48
42014-07-0193.5294.0793.1393.523817020093.52
........................
84601980-12-1826.6326.7526.6326.63183624000.41
84611980-12-1725.8726.0025.8725.87216104000.40
84621980-12-1625.3725.3725.2525.25264320000.39
84631980-12-1527.3827.3827.2527.25439712000.42
84641980-12-1228.7528.8728.7528.751172584000.45
\n", + "

8465 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " Date Open High Low Close Volume Adj Close\n", + "0 2014-07-08 96.27 96.80 93.92 95.35 65130000 95.35\n", + "1 2014-07-07 94.14 95.99 94.10 95.97 56305400 95.97\n", + "2 2014-07-03 93.67 94.10 93.20 94.03 22891800 94.03\n", + "3 2014-07-02 93.87 94.06 93.09 93.48 28420900 93.48\n", + "4 2014-07-01 93.52 94.07 93.13 93.52 38170200 93.52\n", + "... ... ... ... ... ... ... ...\n", + "8460 1980-12-18 26.63 26.75 26.63 26.63 18362400 0.41\n", + "8461 1980-12-17 25.87 26.00 25.87 25.87 21610400 0.40\n", + "8462 1980-12-16 25.37 25.37 25.25 25.25 26432000 0.39\n", + "8463 1980-12-15 27.38 27.38 27.25 27.25 43971200 0.42\n", + "8464 1980-12-12 28.75 28.87 28.75 28.75 117258400 0.45\n", + "\n", + "[8465 rows x 7 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_apple" + ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Check out the type of the columns" + "### Check out the type of the columns" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "Date object\n", + "Open float64\n", + "High float64\n", + "Low float64\n", + "Close float64\n", + "Volume int64\n", + "Adj Close float64\n", + "dtype: object" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_apple.dtypes" + ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Transform the Date column as a datetime type" + "### Transform the Date column as a datetime type" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [], + "source": [ + "numpy.datetime64" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "ename": "ValueError", + "evalue": "The 'datetime64' dtype has no unit. Please pass in 'datetime64[ns]' instead.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/24/tg28vxls25l9mjvqrnh0plc80000gn/T/ipykernel_37619/178386697.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_apple\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdatetime64\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/miniforge3/lib/python3.9/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 5813\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5814\u001b[0m \u001b[0;31m# else, only a single dtype is given\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5815\u001b[0;31m \u001b[0mnew_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_mgr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5816\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_constructor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__finalize__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"astype\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5817\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniforge3/lib/python3.9/site-packages/pandas/core/internals/managers.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 416\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 417\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mT\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mbool\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"raise\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mT\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 418\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"astype\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 419\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 420\u001b[0m def convert(\n", + "\u001b[0;32m~/miniforge3/lib/python3.9/site-packages/pandas/core/internals/managers.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, f, align_keys, ignore_failures, **kwargs)\u001b[0m\n\u001b[1;32m 325\u001b[0m \u001b[0mapplied\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 326\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 327\u001b[0;31m \u001b[0mapplied\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 328\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mTypeError\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 329\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mignore_failures\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniforge3/lib/python3.9/site-packages/pandas/core/internals/blocks.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 589\u001b[0m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 590\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 591\u001b[0;31m \u001b[0mnew_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mastype_array_safe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 592\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 593\u001b[0m \u001b[0mnew_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmaybe_coerce_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_values\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniforge3/lib/python3.9/site-packages/pandas/core/dtypes/cast.py\u001b[0m in \u001b[0;36mastype_array_safe\u001b[0;34m(values, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 1307\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1308\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1309\u001b[0;31m \u001b[0mnew_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mastype_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1310\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mValueError\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1311\u001b[0m \u001b[0;31m# e.g. astype_nansafe can fail on object-dtype of strings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniforge3/lib/python3.9/site-packages/pandas/core/dtypes/cast.py\u001b[0m in \u001b[0;36mastype_array\u001b[0;34m(values, dtype, copy)\u001b[0m\n\u001b[1;32m 1255\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1256\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1257\u001b[0;31m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mastype_nansafe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1259\u001b[0m \u001b[0;31m# in pandas we don't store numpy str dtypes, so convert to object\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniforge3/lib/python3.9/site-packages/pandas/core/dtypes/cast.py\u001b[0m in \u001b[0;36mastype_nansafe\u001b[0;34m(arr, dtype, copy, skipna)\u001b[0m\n\u001b[1;32m 1093\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1094\u001b[0m \u001b[0mflat\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mravel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1095\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mastype_nansafe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mflat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mskipna\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mskipna\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1096\u001b[0m \u001b[0;31m# error: Item \"ExtensionArray\" of \"Union[ExtensionArray, ndarray]\" has no\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1097\u001b[0m \u001b[0;31m# attribute \"reshape\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniforge3/lib/python3.9/site-packages/pandas/core/dtypes/cast.py\u001b[0m in \u001b[0;36mastype_nansafe\u001b[0;34m(arr, dtype, copy, skipna)\u001b[0m\n\u001b[1;32m 1195\u001b[0m \u001b[0;34mf\"'{dtype.name}[ns]' instead.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1196\u001b[0m )\n\u001b[0;32m-> 1197\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1198\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1199\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcopy\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mis_object_dtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mis_object_dtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: The 'datetime64' dtype has no unit. Please pass in 'datetime64[ns]' instead." + ] + } + ], + "source": [ + "df_apple.astype(dtype=numpy.datetime64)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], - "source": [] + "source": [ + "df_apple.Date = df_apple.Date.astype(dtype=numpy.datetime64)" + ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Set the date as the index" + "### Set the date as the index" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
OpenHighLowCloseVolumeAdj Close
Date
2014-07-0896.2796.8093.9295.356513000095.35
2014-07-0794.1495.9994.1095.975630540095.97
2014-07-0393.6794.1093.2094.032289180094.03
2014-07-0293.8794.0693.0993.482842090093.48
2014-07-0193.5294.0793.1393.523817020093.52
.....................
1980-12-1826.6326.7526.6326.63183624000.41
1980-12-1725.8726.0025.8725.87216104000.40
1980-12-1625.3725.3725.2525.25264320000.39
1980-12-1527.3827.3827.2527.25439712000.42
1980-12-1228.7528.8728.7528.751172584000.45
\n", + "

8465 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " Open High Low Close Volume Adj Close\n", + "Date \n", + "2014-07-08 96.27 96.80 93.92 95.35 65130000 95.35\n", + "2014-07-07 94.14 95.99 94.10 95.97 56305400 95.97\n", + "2014-07-03 93.67 94.10 93.20 94.03 22891800 94.03\n", + "2014-07-02 93.87 94.06 93.09 93.48 28420900 93.48\n", + "2014-07-01 93.52 94.07 93.13 93.52 38170200 93.52\n", + "... ... ... ... ... ... ...\n", + "1980-12-18 26.63 26.75 26.63 26.63 18362400 0.41\n", + "1980-12-17 25.87 26.00 25.87 25.87 21610400 0.40\n", + "1980-12-16 25.37 25.37 25.25 25.25 26432000 0.39\n", + "1980-12-15 27.38 27.38 27.25 27.25 43971200 0.42\n", + "1980-12-12 28.75 28.87 28.75 28.75 117258400 0.45\n", + "\n", + "[8465 rows x 6 columns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_apple.set_index(keys='Date')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], - "source": [] + "source": [ + "df_apple = df_apple.set_index(keys='Date')" + ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Is there any duplicate dates?" + "### Is there any duplicate dates?" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_apple.index.has_duplicates" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([False, False, False, ..., False, False, False])" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_apple.index.duplicated()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_apple.index.duplicated().sum()" + ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Ops...it seems the index is from the most recent date. Make the first entry the oldest date." + "### Ops...it seems the index is from the most recent date. Make the first entry the oldest date." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], - "source": [] + "source": [ + "df_apple = df_apple.sort_index()" + ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Get the last business day of each month" + "### Get the last business day of each month" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'DataFrame' object has no attribute 'Date'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/24/tg28vxls25l9mjvqrnh0plc80000gn/T/ipykernel_37619/2357593726.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_apple\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDate\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/miniforge3/lib/python3.9/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 5485\u001b[0m ):\n\u001b[1;32m 5486\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5487\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5488\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5489\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__setattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'Date'" + ] + } + ], + "source": [ + "df_apple.Date" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close'], dtype='object')" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_apple.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Int64Index([12, 12, 12, 12, 12, 12, 12, 12, 12, 12,\n", + " ...\n", + " 6, 6, 6, 6, 6, 7, 7, 7, 7, 7],\n", + " dtype='int64', name='Date', length=8465)" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_apple.index.month" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([False, False, False, ..., False, False, False])" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_apple.index.is_month_end" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
OpenHighLowCloseVolumeAdj Close
Date
1980-12-3134.2534.2534.1334.1389376000.53
1981-03-3124.7524.7524.5024.5039984000.38
1981-04-3028.3828.6228.3828.3831528000.44
1981-06-3026.1326.1326.0026.0089768000.41
1981-07-3125.0025.1225.0025.0027384000.39
.....................
2014-01-31495.18501.53493.55500.6011619930070.69
2014-02-28529.08532.75522.12526.249299220074.76
2014-03-31539.23540.81535.93536.744216730076.25
2014-04-30592.64599.43589.80590.0911416020083.83
2014-06-3092.1093.7392.0992.934948230092.93
\n", + "

283 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " Open High Low Close Volume Adj Close\n", + "Date \n", + "1980-12-31 34.25 34.25 34.13 34.13 8937600 0.53\n", + "1981-03-31 24.75 24.75 24.50 24.50 3998400 0.38\n", + "1981-04-30 28.38 28.62 28.38 28.38 3152800 0.44\n", + "1981-06-30 26.13 26.13 26.00 26.00 8976800 0.41\n", + "1981-07-31 25.00 25.12 25.00 25.00 2738400 0.39\n", + "... ... ... ... ... ... ...\n", + "2014-01-31 495.18 501.53 493.55 500.60 116199300 70.69\n", + "2014-02-28 529.08 532.75 522.12 526.24 92992200 74.76\n", + "2014-03-31 539.23 540.81 535.93 536.74 42167300 76.25\n", + "2014-04-30 592.64 599.43 589.80 590.09 114160200 83.83\n", + "2014-06-30 92.10 93.73 92.09 92.93 49482300 92.93\n", + "\n", + "[283 rows x 6 columns]" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_apple[df_apple.index.is_month_end]" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.frame.DataFrame" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(df_apple)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['1980-12-12', '1980-12-15', '1980-12-16', '1980-12-17',\n", + " '1980-12-18', '1980-12-19', '1980-12-22', '1980-12-23',\n", + " '1980-12-24', '1980-12-26',\n", + " ...\n", + " '2014-06-24', '2014-06-25', '2014-06-26', '2014-06-27',\n", + " '2014-06-30', '2014-07-01', '2014-07-02', '2014-07-03',\n", + " '2014-07-07', '2014-07-08'],\n", + " dtype='datetime64[ns]', name='Date', length=8465, freq=None)" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_apple.index" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
OpenHighLowCloseVolumeAdj Close
Date
1980-12-1228.7528.8728.7528.751172584000.45
1980-12-1527.3827.3827.2527.25439712000.42
1980-12-1625.3725.3725.2525.25264320000.39
1980-12-1725.8726.0025.8725.87216104000.40
1980-12-1826.6326.7526.6326.63183624000.41
.....................
2014-07-0193.5294.0793.1393.523817020093.52
2014-07-0293.8794.0693.0993.482842090093.48
2014-07-0393.6794.1093.2094.032289180094.03
2014-07-0794.1495.9994.1095.975630540095.97
2014-07-0896.2796.8093.9295.356513000095.35
\n", + "

8465 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " Open High Low Close Volume Adj Close\n", + "Date \n", + "1980-12-12 28.75 28.87 28.75 28.75 117258400 0.45\n", + "1980-12-15 27.38 27.38 27.25 27.25 43971200 0.42\n", + "1980-12-16 25.37 25.37 25.25 25.25 26432000 0.39\n", + "1980-12-17 25.87 26.00 25.87 25.87 21610400 0.40\n", + "1980-12-18 26.63 26.75 26.63 26.63 18362400 0.41\n", + "... ... ... ... ... ... ...\n", + "2014-07-01 93.52 94.07 93.13 93.52 38170200 93.52\n", + "2014-07-02 93.87 94.06 93.09 93.48 28420900 93.48\n", + "2014-07-03 93.67 94.10 93.20 94.03 22891800 94.03\n", + "2014-07-07 94.14 95.99 94.10 95.97 56305400 95.97\n", + "2014-07-08 96.27 96.80 93.92 95.35 65130000 95.35\n", + "\n", + "[8465 rows x 6 columns]" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_apple" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Int64Index([31, 31, 31, 31, 31, 31, 31, 31, 31, 31,\n", + " ...\n", + " 30, 30, 30, 30, 31, 31, 31, 31, 31, 31],\n", + " dtype='int64', name='Date', length=8465)" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(df_apple.index + pd.offsets.BMonthEnd(1)).day" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
OpenHighLowCloseVolumeAdj Close
Date
1980-1228.7528.8728.7528.751172584000.45
1980-1227.3827.3827.2527.25439712000.42
1980-1225.3725.3725.2525.25264320000.39
1980-1225.8726.0025.8725.87216104000.40
1980-1226.6326.7526.6326.63183624000.41
.....................
2014-0793.5294.0793.1393.523817020093.52
2014-0793.8794.0693.0993.482842090093.48
2014-0793.6794.1093.2094.032289180094.03
2014-0794.1495.9994.1095.975630540095.97
2014-0796.2796.8093.9295.356513000095.35
\n", + "

8465 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " Open High Low Close Volume Adj Close\n", + "Date \n", + "1980-12 28.75 28.87 28.75 28.75 117258400 0.45\n", + "1980-12 27.38 27.38 27.25 27.25 43971200 0.42\n", + "1980-12 25.37 25.37 25.25 25.25 26432000 0.39\n", + "1980-12 25.87 26.00 25.87 25.87 21610400 0.40\n", + "1980-12 26.63 26.75 26.63 26.63 18362400 0.41\n", + "... ... ... ... ... ... ...\n", + "2014-07 93.52 94.07 93.13 93.52 38170200 93.52\n", + "2014-07 93.87 94.06 93.09 93.48 28420900 93.48\n", + "2014-07 93.67 94.10 93.20 94.03 22891800 94.03\n", + "2014-07 94.14 95.99 94.10 95.97 56305400 95.97\n", + "2014-07 96.27 96.80 93.92 95.35 65130000 95.35\n", + "\n", + "[8465 rows x 6 columns]" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_apple.to_period('M')" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_apple.resample('BM')" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
OpenHighLowCloseVolumeAdj Close
Date
1980-12-3130.48153830.56769230.44307730.4430772.586252e+070.473077
1981-01-3031.75476231.82666731.65476231.6547627.249867e+060.493810
1981-02-2726.48000026.57210526.40789526.4078954.231832e+060.411053
1981-03-3124.93772725.01681824.83636424.8363647.962691e+060.387727
1981-04-3027.28666727.36809527.22714327.2271436.392000e+060.423333
.....................
2014-03-31533.593333536.453810530.070952533.2142865.954403e+0775.750000
2014-04-30540.081905544.349048536.262381541.0742867.660787e+0776.867143
2014-05-30601.301905606.372857598.332857603.1957146.828177e+0786.058571
2014-06-30222.360000224.084286220.735714222.6580955.745506e+0791.885714
2014-07-3194.29400095.00400093.48800094.4700004.218366e+0794.470000
\n", + "

404 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " Open High Low Close Volume \\\n", + "Date \n", + "1980-12-31 30.481538 30.567692 30.443077 30.443077 2.586252e+07 \n", + "1981-01-30 31.754762 31.826667 31.654762 31.654762 7.249867e+06 \n", + "1981-02-27 26.480000 26.572105 26.407895 26.407895 4.231832e+06 \n", + "1981-03-31 24.937727 25.016818 24.836364 24.836364 7.962691e+06 \n", + "1981-04-30 27.286667 27.368095 27.227143 27.227143 6.392000e+06 \n", + "... ... ... ... ... ... \n", + "2014-03-31 533.593333 536.453810 530.070952 533.214286 5.954403e+07 \n", + "2014-04-30 540.081905 544.349048 536.262381 541.074286 7.660787e+07 \n", + "2014-05-30 601.301905 606.372857 598.332857 603.195714 6.828177e+07 \n", + "2014-06-30 222.360000 224.084286 220.735714 222.658095 5.745506e+07 \n", + "2014-07-31 94.294000 95.004000 93.488000 94.470000 4.218366e+07 \n", + "\n", + " Adj Close \n", + "Date \n", + "1980-12-31 0.473077 \n", + "1981-01-30 0.493810 \n", + "1981-02-27 0.411053 \n", + "1981-03-31 0.387727 \n", + "1981-04-30 0.423333 \n", + "... ... \n", + "2014-03-31 75.750000 \n", + "2014-04-30 76.867143 \n", + "2014-05-30 86.058571 \n", + "2014-06-30 91.885714 \n", + "2014-07-31 94.470000 \n", + "\n", + "[404 rows x 6 columns]" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_apple.resample('BM').mean()" + ] + }, + { + "cell_type": "code", + "metadata": {}, "outputs": [], "source": [] }, @@ -160,14 +1904,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. What is the difference in days between the first day and the oldest" + "### What is the difference in days between the first day and the oldest" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [] @@ -176,14 +1922,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. How many months in the data we have?" + "### How many months in the data we have?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [] @@ -192,14 +1940,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. Plot the 'Adj Close' value. Set the size of the figure to 13.5 x 9 inches" + "### Plot the 'Adj Close' value. Set the size of the figure to 13.5 x 9 inches" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [] @@ -213,34 +1963,35 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [] } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 4 } diff --git a/09_Time_Series/Apple_Stock/Solutions.ipynb b/09_Time_Series/Apple_Stock/Solutions.ipynb index b3c39c08f..33e21558a 100644 --- a/09_Time_Series/Apple_Stock/Solutions.ipynb +++ b/09_Time_Series/Apple_Stock/Solutions.ipynb @@ -13,41 +13,21 @@ "source": [ "### Introduction:\n", "\n", - "We are going to use Apple's stock price.\n", - "\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "\n", - "# visualization\n", - "import matplotlib.pyplot as plt\n", - "\n", - "%matplotlib inline" + "We are going to use Apple's stock price." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/09_Time_Series/Apple_Stock/appl_1980_2014.csv)" + "### Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/09_Time_Series/Apple_Stock/appl_1980_2014.csv)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable apple" + "### Assign it to a variable apple" ] }, { @@ -149,7 +129,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Check out the type of the columns" + "### Check out the type of the columns" ] }, { @@ -183,7 +163,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Transform the Date column as a datetime type" + "### Transform the Date column as a datetime type" ] }, { @@ -215,7 +195,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Set the date as the index" + "### Set the date as the index" ] }, { @@ -321,7 +301,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Is there any duplicate dates?" + "### Is there any duplicate dates?" ] }, { @@ -350,7 +330,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Ops...it seems the index is from the most recent date. Make the first entry the oldest date." + "### Ops...it seems the index is from the most recent date. Make the first entry the oldest date." ] }, { @@ -456,7 +436,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Get the last business day of each month" + "### Get the last business day of each month" ] }, { @@ -562,7 +542,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. What is the difference in days between the first day and the oldest" + "### What is the difference in days between the first day and the oldest" ] }, { @@ -589,7 +569,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. How many months in the data we have?" + "### How many months in the data we have?" ] }, { @@ -616,7 +596,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. Plot the 'Adj Close' value. Set the size of the figure to 13.5 x 9 inches" + "### Plot the 'Adj Close' value. Set the size of the figure to 13.5 x 9 inches" ] }, { @@ -648,7 +628,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -657,23 +636,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/09_Time_Series/Getting_Financial_Data/Exercises.ipynb b/09_Time_Series/Getting_Financial_Data/Exercises.ipynb index 2cf792c43..239d592f4 100644 --- a/09_Time_Series/Getting_Financial_Data/Exercises.ipynb +++ b/09_Time_Series/Getting_Financial_Data/Exercises.ipynb @@ -13,29 +13,18 @@ "source": [ "### Introduction:\n", "\n", - "This time you will get data from a website.\n", - "\n", - "\n", - "### Step 1. Import the necessary libraries" + "This time you will get data from a website." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Create your time range (start and end variables). The start date should be 01/01/2015 and the end should today (whatever your today is)." + "### Create your time range (start and end variables). The start date should be 01/01/2015 and the end should today (whatever your today is)." ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -44,7 +33,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Get an API key for one of the APIs that are supported by Pandas Datareader, preferably for AlphaVantage.\n", + "### Get an API key for one of the APIs that are supported by Pandas Datareader, preferably for AlphaVantage.\n", "\n", "If you do not have an API key for any of the supported APIs, it is easiest to get one for [AlphaVantage](https://www.alphavantage.co/support/#api-key). (Note that the API key is shown directly after the signup. You do *not* receive it via e-mail.)\n", "\n", @@ -55,12 +44,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Use Pandas Datarader to read the daily time series for the Apple stock (ticker symbol AAPL) between 01/01/2015 and today, assign it to df_apple and print it." + "### Use Pandas Datarader to read the daily time series for the Apple stock (ticker symbol AAPL) between 01/01/2015 and today, assign it to df_apple and print it." ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -69,12 +57,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Add a new column \"stock\" to the dataframe and add the ticker symbol" + "### Add a new column \"stock\" to the dataframe and add the ticker symbol" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -83,12 +70,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Repeat the two previous steps for a few other stocks, always creating a new dataframe: Tesla, IBM and Microsoft. (Ticker symbols TSLA, IBM and MSFT.)" + "### Repeat the two previous steps for a few other stocks, always creating a new dataframe: Tesla, IBM and Microsoft. (Ticker symbols TSLA, IBM and MSFT.)" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -97,12 +83,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Combine the four separate dataFrames into one combined dataFrame df that holds the information for all four stocks" + "### Combine the four separate dataFrames into one combined dataFrame df that holds the information for all four stocks" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -111,12 +96,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Shift the stock column into the index (making it a multi-level index consisting of the ticker symbol and the date)." + "### Shift the stock column into the index (making it a multi-level index consisting of the ticker symbol and the date)." ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -125,12 +109,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Create a dataFrame called vol, with the volume values." + "### Create a dataFrame called vol, with the volume values." ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -139,13 +122,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Aggregate the data of volume to weekly.\n", + "### Aggregate the data of volume to weekly.\n", "Hint: Be careful to not sum data from the same week of 2015 and other years." ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -154,12 +136,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Find all the volume traded in the year of 2015" + "### Find all the volume traded in the year of 2015" ] }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [] @@ -167,7 +148,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -181,7 +162,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/09_Time_Series/Getting_Financial_Data/Solutions.ipynb b/09_Time_Series/Getting_Financial_Data/Solutions.ipynb index 9f1693e12..b90accfec 100644 --- a/09_Time_Series/Getting_Financial_Data/Solutions.ipynb +++ b/09_Time_Series/Getting_Financial_Data/Solutions.ipynb @@ -13,34 +13,14 @@ "source": [ "### Introduction:\n", "\n", - "This time you will get data from a website.\n", - "\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "# package to extract data from various Internet sources into a DataFrame\n", - "# make sure you have it installed\n", - "import pandas_datareader.data as web\n", - "\n", - "# package for dates\n", - "import datetime as dt" + "This time you will get data from a website." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Create your time range (start and end variables). The start date should be 01/01/2015 and the end should today (whatever your today is)." + "### Create your time range (start and end variables). The start date should be 01/01/2015 and the end should today (whatever your today is)." ] }, { @@ -65,7 +45,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Get an API key for one of the APIs that are supported by Pandas Datareader, preferably for AlphaVantage.\n", + "### Get an API key for one of the APIs that are supported by Pandas Datareader, preferably for AlphaVantage.\n", "\n", "If you do not have an API key for any of the supported APIs, it is easiest to get one for [AlphaVantage](https://www.alphavantage.co/support/#api-key). (Note that the API key is shown directly after the signup. You do *not* receive it via e-mail.)\n", "\n", @@ -76,7 +56,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Use Pandas Datarader to read the daily time series for the Apple stock (ticker symbol AAPL) between 01/01/2015 and today, assign it to df_apple and print it." + "### Use Pandas Datarader to read the daily time series for the Apple stock (ticker symbol AAPL) between 01/01/2015 and today, assign it to df_apple and print it." ] }, { @@ -234,7 +214,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Add a new column \"stock\" to the dataframe and add the ticker symbol" + "### Add a new column \"stock\" to the dataframe and add the ticker symbol" ] }, { @@ -404,7 +384,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Repeat the two previous steps for a few other stocks, always creating a new dataframe: Tesla, IBM and Microsoft. (Ticker symbols TSLA, IBM and MSFT.)" + "### Repeat the two previous steps for a few other stocks, always creating a new dataframe: Tesla, IBM and Microsoft. (Ticker symbols TSLA, IBM and MSFT.)" ] }, { @@ -418,7 +398,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Combine the four separate dataFrames into one combined dataFrame df that holds the information for all four stocks" + "### Combine the four separate dataFrames into one combined dataFrame df that holds the information for all four stocks" ] }, { @@ -588,7 +568,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Shift the stock column into the index (making it a multi-level index consisting of the ticker symbol and the date)." + "### Shift the stock column into the index (making it a multi-level index consisting of the ticker symbol and the date)." ] }, { @@ -768,7 +748,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Create a dataFrame called vol, with the volume values." + "### Create a dataFrame called vol, with the volume values." ] }, { @@ -896,7 +876,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Aggregate the data of volume to weekly.\n", + "### Aggregate the data of volume to weekly.\n", "Hint: Be careful to not sum data from the same week of 2015 and other years." ] }, @@ -1056,7 +1036,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Find all the volume traded in the year of 2015" + "### Find all the volume traded in the year of 2015" ] }, { @@ -1126,7 +1106,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1140,7 +1120,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/09_Time_Series/Investor_Flow_of_Funds_US/Exercises.ipynb b/09_Time_Series/Investor_Flow_of_Funds_US/Exercises.ipynb index 8e0420625..756de880b 100644 --- a/09_Time_Series/Investor_Flow_of_Funds_US/Exercises.ipynb +++ b/09_Time_Series/Investor_Flow_of_Funds_US/Exercises.ipynb @@ -13,37 +13,25 @@ "source": [ "### Introduction:\n", "\n", - "Special thanks to: https://github.com/rgrp for sharing the dataset.\n", - "\n", - "### Step 1. Import the necessary libraries" + "Special thanks to: https://github.com/rgrp for sharing the dataset." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/datasets/investor-flow-of-funds-us/master/data/weekly.csv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/datasets/investor-flow-of-funds-us/master/data/weekly.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called " + "### Assign it to a variable called " ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -54,12 +42,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. What is the frequency of the dataset?" + "### What is the frequency of the dataset?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -70,12 +57,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Set the column Date as the index." + "### Set the column Date as the index." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -86,12 +72,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. What is the type of the index?" + "### What is the type of the index?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -102,12 +87,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Set the index to a DatetimeIndex type" + "### Set the index to a DatetimeIndex type" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -118,12 +102,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Change the frequency to monthly, sum the values and assign it to monthly." + "### Change the frequency to monthly, sum the values and assign it to monthly." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -134,12 +117,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. You will notice that it filled the dataFrame with months that don't have any data with NaN. Let's drop these rows." + "### You will notice that it filled the dataFrame with months that don't have any data with NaN. Let's drop these rows." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -150,12 +132,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Good, now we have the monthly data. Now change the frequency to year." + "### Good, now we have the monthly data. Now change the frequency to year." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -171,7 +152,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -181,21 +161,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/09_Time_Series/Investor_Flow_of_Funds_US/Exercises_with_code_and_solutions.ipynb b/09_Time_Series/Investor_Flow_of_Funds_US/Exercises_with_code_and_solutions.ipynb index 85b201bdb..23d154549 100644 --- a/09_Time_Series/Investor_Flow_of_Funds_US/Exercises_with_code_and_solutions.ipynb +++ b/09_Time_Series/Investor_Flow_of_Funds_US/Exercises_with_code_and_solutions.ipynb @@ -15,32 +15,21 @@ "source": [ "### Introduction:\n", "\n", - "Special thanks to: https://github.com/rgrp for sharing the dataset.\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd" + "Special thanks to: https://github.com/rgrp for sharing the dataset." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/datasets/investor-flow-of-funds-us/master/data/weekly.csv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/datasets/investor-flow-of-funds-us/master/data/weekly.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called " + "### Assign it to a variable called " ] }, { @@ -163,7 +152,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. What is the frequency of the dataset?" + "### What is the frequency of the dataset?" ] }, { @@ -179,7 +168,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Set the column Date as the index." + "### Set the column Date as the index." ] }, { @@ -308,7 +297,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. What is the type of the index?" + "### What is the type of the index?" ] }, { @@ -347,7 +336,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Set the index to a DatetimeIndex type" + "### Set the index to a DatetimeIndex type" ] }, { @@ -375,7 +364,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Change the frequency to monthly, sum the values and assign it to monthly." + "### Change the frequency to monthly, sum the values and assign it to monthly." ] }, { @@ -816,7 +805,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. You will notice that it filled the dataFrame with months that don't have any data with NaN. Let's drop these rows." + "### You will notice that it filled the dataFrame with months that don't have any data with NaN. Let's drop these rows." ] }, { @@ -1049,7 +1038,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Good, now we have the monthly data. Now change the frequency to year." + "### Good, now we have the monthly data. Now change the frequency to year." ] }, { @@ -1170,7 +1159,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -1180,7 +1168,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1194,20 +1182,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false + "version": "3.9.7" } }, "nbformat": 4, diff --git a/09_Time_Series/Investor_Flow_of_Funds_US/Solutions.ipynb b/09_Time_Series/Investor_Flow_of_Funds_US/Solutions.ipynb index 318f9d4b4..2613e3b0c 100644 --- a/09_Time_Series/Investor_Flow_of_Funds_US/Solutions.ipynb +++ b/09_Time_Series/Investor_Flow_of_Funds_US/Solutions.ipynb @@ -13,32 +13,21 @@ "source": [ "### Introduction:\n", "\n", - "Special thanks to: https://github.com/rgrp for sharing the dataset.\n", - "\n", - "### Step 1. Import the necessary libraries" + "Special thanks to: https://github.com/rgrp for sharing the dataset." ] }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/datasets/investor-flow-of-funds-us/master/data/weekly.csv). " + "### Import the dataset from this [address](https://raw.githubusercontent.com/datasets/investor-flow-of-funds-us/master/data/weekly.csv). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called " + "### Assign it to a variable called " ] }, { @@ -159,7 +148,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. What is the frequency of the dataset?" + "### What is the frequency of the dataset?" ] }, { @@ -177,7 +166,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Set the column Date as the index." + "### Set the column Date as the index." ] }, { @@ -305,7 +294,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. What is the type of the index?" + "### What is the type of the index?" ] }, { @@ -345,7 +334,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Set the index to a DatetimeIndex type" + "### Set the index to a DatetimeIndex type" ] }, { @@ -372,7 +361,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Change the frequency to monthly, sum the values and assign it to monthly." + "### Change the frequency to monthly, sum the values and assign it to monthly." ] }, { @@ -812,7 +801,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. You will notice that it filled the dataFrame with months that don't have any data with NaN. Let's drop these rows." + "### You will notice that it filled the dataFrame with months that don't have any data with NaN. Let's drop these rows." ] }, { @@ -1044,7 +1033,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Good, now we have the monthly data. Now change the frequency to year." + "### Good, now we have the monthly data. Now change the frequency to year." ] }, { @@ -1164,7 +1153,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -1174,21 +1162,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/10_Deleting/.DS_Store b/10_Deleting/.DS_Store index f37223950..66e8cc170 100644 Binary files a/10_Deleting/.DS_Store and b/10_Deleting/.DS_Store differ diff --git a/10_Deleting/Iris/Exercises.ipynb b/10_Deleting/Iris/Exercises.ipynb index e2e1ad902..d45234181 100644 --- a/10_Deleting/Iris/Exercises.ipynb +++ b/10_Deleting/Iris/Exercises.ipynb @@ -13,37 +13,25 @@ "source": [ "### Introduction:\n", "\n", - "This exercise may seem a little bit strange, but keep doing it.\n", - "\n", - "### Step 1. Import the necessary libraries" + "This exercise may seem a little bit strange, but keep doing it." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data). " + "### Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called iris" + "### Assign it to a variable called iris" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -54,7 +42,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Create columns for the dataset" + "### Create columns for the dataset" ] }, { @@ -76,12 +64,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Is there any missing value in the dataframe?" + "### Is there any missing value in the dataframe?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -92,12 +79,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Lets set the values of the rows 10 to 29 of the column 'petal_length' to NaN" + "### Lets set the values of the rows 10 to 29 of the column 'petal_length' to NaN" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -108,12 +94,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Good, now lets substitute the NaN values to 1.0" + "### Good, now lets substitute the NaN values to 1.0" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -124,12 +109,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Now let's delete the column class" + "### Now let's delete the column class" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -140,12 +124,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Set the first 3 rows as NaN" + "### Set the first 3 rows as NaN" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -156,12 +139,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Delete the rows that have NaN" + "### Delete the rows that have NaN" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -172,12 +154,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Reset the index so it begins with 0 again" + "### Reset the index so it begins with 0 again" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -193,7 +174,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -203,21 +183,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/10_Deleting/Iris/Exercises_with_solutions_and_code.ipynb b/10_Deleting/Iris/Exercises_with_solutions_and_code.ipynb index 0414f7fd6..ca92bafb9 100644 --- a/10_Deleting/Iris/Exercises_with_solutions_and_code.ipynb +++ b/10_Deleting/Iris/Exercises_with_solutions_and_code.ipynb @@ -15,33 +15,21 @@ "source": [ "### Introduction:\n", "\n", - "This exercise may seem a little bit strange, but keep doing it.\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np" + "This exercise may seem a little bit strange, but keep doing it." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data). " + "### Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called iris" + "### Assign it to a variable called iris" ] }, { @@ -134,7 +122,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Create columns for the dataset" + "### Create columns for the dataset" ] }, { @@ -231,7 +219,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Is there any missing value in the dataframe?" + "### Is there any missing value in the dataframe?" ] }, { @@ -264,7 +252,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Lets set the values of the rows 10 to 29 of the column 'petal_length' to NaN" + "### Lets set the values of the rows 10 to 29 of the column 'petal_length' to NaN" ] }, { @@ -490,7 +478,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Good, now lets substitute the NaN values to 1.0" + "### Good, now lets substitute the NaN values to 1.0" ] }, { @@ -1088,7 +1076,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Now let's delete the column class" + "### Now let's delete the column class" ] }, { @@ -1173,7 +1161,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Set the first 3 rows as NaN" + "### Set the first 3 rows as NaN" ] }, { @@ -1258,7 +1246,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Delete the rows that have NaN" + "### Delete the rows that have NaN" ] }, { @@ -1343,7 +1331,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Reset the index so it begins with 0 again" + "### Reset the index so it begins with 0 again" ] }, { @@ -1433,7 +1421,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -1443,7 +1430,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1457,20 +1444,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false + "version": "3.9.7" } }, "nbformat": 4, diff --git a/10_Deleting/Iris/Solutions.ipynb b/10_Deleting/Iris/Solutions.ipynb index bb67ac43a..cd522af7c 100644 --- a/10_Deleting/Iris/Solutions.ipynb +++ b/10_Deleting/Iris/Solutions.ipynb @@ -13,35 +13,21 @@ "source": [ "### Introduction:\n", "\n", - "This exercise may seem a little bit strange, but keep doing it.\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np" + "This exercise may seem a little bit strange, but keep doing it." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data). " + "### Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called iris" + "### Assign it to a variable called iris" ] }, { @@ -131,7 +117,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Create columns for the dataset" + "### Create columns for the dataset" ] }, { @@ -227,7 +213,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Is there any missing value in the dataframe?" + "### Is there any missing value in the dataframe?" ] }, { @@ -259,7 +245,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Lets set the values of the rows 10 to 29 of the column 'petal_length' to NaN" + "### Lets set the values of the rows 10 to 29 of the column 'petal_length' to NaN" ] }, { @@ -484,7 +470,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Good, now lets substitute the NaN values to 1.0" + "### Good, now lets substitute the NaN values to 1.0" ] }, { @@ -1081,7 +1067,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Now let's delete the column class" + "### Now let's delete the column class" ] }, { @@ -1165,7 +1151,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Set the first 3 rows as NaN" + "### Set the first 3 rows as NaN" ] }, { @@ -1249,7 +1235,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Delete the rows that have NaN" + "### Delete the rows that have NaN" ] }, { @@ -1333,7 +1319,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Reset the index so it begins with 0 again" + "### Reset the index so it begins with 0 again" ] }, { @@ -1422,7 +1408,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -1432,21 +1417,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/10_Deleting/Wine/Exercises.ipynb b/10_Deleting/Wine/Exercises.ipynb index a89c6f6e2..b76c8b8da 100644 --- a/10_Deleting/Wine/Exercises.ipynb +++ b/10_Deleting/Wine/Exercises.ipynb @@ -14,37 +14,25 @@ "### Introduction:\n", "\n", "This exercise is a adaptation from the UCI Wine dataset.\n", - "The only pupose is to practice deleting data with pandas.\n", - "\n", - "### Step 1. Import the necessary libraries" + "The only pupose is to practice deleting data with pandas." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data). " + "### Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called wine" + "### Assign it to a variable called wine" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -55,12 +43,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Delete the first, fourth, seventh, nineth, eleventh, thirteenth and fourteenth columns" + "### Delete the first, fourth, seventh, nineth, eleventh, thirteenth and fourteenth columns" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -71,7 +58,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Assign the columns as below:\n", + "### Assign the columns as below:\n", "\n", "The attributes are (donated by Riccardo Leardi, riclea '@' anchem.unige.it): \n", "1) alcohol \n", @@ -85,7 +72,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -96,12 +82,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Set the values of the first 3 rows from alcohol as NaN" + "### Set the values of the first 3 rows from alcohol as NaN" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -112,12 +97,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Now set the value of the rows 3 and 4 of magnesium as NaN" + "### Now set the value of the rows 3 and 4 of magnesium as NaN" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -128,12 +112,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Fill the value of NaN with the number 10 in alcohol and 100 in magnesium" + "### Fill the value of NaN with the number 10 in alcohol and 100 in magnesium" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -144,12 +127,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Count the number of missing values" + "### Count the number of missing values" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -160,12 +142,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Create an array of 10 random numbers up until 10" + "### Create an array of 10 random numbers up until 10" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -176,12 +157,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Use random numbers you generated as an index and assign NaN value to each of cell." + "### Use random numbers you generated as an index and assign NaN value to each of cell." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -192,12 +172,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. How many missing values do we have?" + "### How many missing values do we have?" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -208,12 +187,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. Delete the rows that contain missing values" + "### Delete the rows that contain missing values" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -224,12 +202,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 14. Print only the non-null values in alcohol" + "### Print only the non-null values in alcohol" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -238,7 +215,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -249,12 +225,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 15. Reset the index, so it starts with 0 again" + "### Reset the index, so it starts with 0 again" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": false }, @@ -270,7 +245,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -280,21 +254,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/10_Deleting/Wine/Exercises_code_and_solutions.ipynb b/10_Deleting/Wine/Exercises_code_and_solutions.ipynb index 265a2071a..7901d758d 100644 --- a/10_Deleting/Wine/Exercises_code_and_solutions.ipynb +++ b/10_Deleting/Wine/Exercises_code_and_solutions.ipynb @@ -14,35 +14,21 @@ "### Introduction:\n", "\n", "This exercise is a adaptation from the UCI Wine dataset.\n", - "The only pupose is to practice deleting data with pandas.\n", - "\n", - "### Step 1. Import the necessary libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np" + "The only pupose is to practice deleting data with pandas." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data). " + "### Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called wine" + "### Assign it to a variable called wine" ] }, { @@ -198,7 +184,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Delete the first, fourth, seventh, nineth, eleventh, thirteenth and fourteenth columns" + "### Delete the first, fourth, seventh, nineth, eleventh, thirteenth and fourteenth columns" ] }, { @@ -304,7 +290,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Assign the columns as below:\n", + "### Assign the columns as below:\n", "\n", "The attributes are (donated by Riccardo Leardi, riclea '@' anchem.unige.it): \n", "1) alcohol \n", @@ -425,7 +411,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Set the values of the first 3 rows from alcohol as NaN" + "### Set the values of the first 3 rows from alcohol as NaN" ] }, { @@ -537,7 +523,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Now set the value of the rows 3 and 4 of magnesium as NaN" + "### Now set the value of the rows 3 and 4 of magnesium as NaN" ] }, { @@ -649,7 +635,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Fill the value of NaN with the number 10 in alcohol and 100 in magnesium" + "### Fill the value of NaN with the number 10 in alcohol and 100 in magnesium" ] }, { @@ -764,7 +750,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Count the number of missing values" + "### Count the number of missing values" ] }, { @@ -800,7 +786,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Create an array of 10 random numbers up until 10" + "### Create an array of 10 random numbers up until 10" ] }, { @@ -830,7 +816,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Use random numbers you generated as an index and assign NaN value to each of cell." + "### Use random numbers you generated as an index and assign NaN value to each of cell." ] }, { @@ -1002,7 +988,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. How many missing values do we have?" + "### How many missing values do we have?" ] }, { @@ -1038,7 +1024,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. Delete the rows that contain missing values" + "### Delete the rows that contain missing values" ] }, { @@ -1150,7 +1136,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 14. Print only the non-null values in alcohol" + "### Print only the non-null values in alcohol" ] }, { @@ -1326,7 +1312,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 15. Reset the index, so it starts with 0 again" + "### Reset the index, so it starts with 0 again" ] }, { @@ -1443,7 +1429,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -1452,23 +1437,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/10_Deleting/Wine/Solutions.ipynb b/10_Deleting/Wine/Solutions.ipynb index d91227bfc..36341ac19 100644 --- a/10_Deleting/Wine/Solutions.ipynb +++ b/10_Deleting/Wine/Solutions.ipynb @@ -14,32 +14,21 @@ "### Introduction:\n", "\n", "This exercise is a adaptation from the UCI Wine dataset.\n", - "The only pupose is to practice deleting data with pandas.\n", - "\n", - "### Step 1. Import the necessary libraries" + "The only pupose is to practice deleting data with pandas." ] }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data). " + "### Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3. Assign it to a variable called wine" + "### Assign it to a variable called wine" ] }, { @@ -190,7 +179,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4. Delete the first, fourth, seventh, nineth, eleventh, thirteenth and fourteenth columns" + "### Delete the first, fourth, seventh, nineth, eleventh, thirteenth and fourteenth columns" ] }, { @@ -292,7 +281,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5. Assign the columns as below:\n", + "### Assign the columns as below:\n", "\n", "The attributes are (donated by Riccardo Leardi, riclea '@' anchem.unige.it): \n", "1) alcohol \n", @@ -410,7 +399,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Set the values of the first 3 rows from alcohol as NaN" + "### Set the values of the first 3 rows from alcohol as NaN" ] }, { @@ -519,7 +508,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Now set the value of the rows 3 and 4 of magnesium as NaN" + "### Now set the value of the rows 3 and 4 of magnesium as NaN" ] }, { @@ -628,7 +617,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. Fill the value of NaN with the number 10 in alcohol and 100 in magnesium" + "### Fill the value of NaN with the number 10 in alcohol and 100 in magnesium" ] }, { @@ -737,7 +726,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. Count the number of missing values" + "### Count the number of missing values" ] }, { @@ -771,7 +760,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. Create an array of 10 random numbers up until 10" + "### Create an array of 10 random numbers up until 10" ] }, { @@ -798,7 +787,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. Use random numbers you generated as an index and assign NaN value to each of cell." + "### Use random numbers you generated as an index and assign NaN value to each of cell." ] }, { @@ -967,7 +956,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. How many missing values do we have?" + "### How many missing values do we have?" ] }, { @@ -1001,7 +990,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. Delete the rows that contain missing values" + "### Delete the rows that contain missing values" ] }, { @@ -1110,7 +1099,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 14. Print only the non-null values in alcohol" + "### Print only the non-null values in alcohol" ] }, { @@ -1281,7 +1270,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 15. Reset the index, so it starts with 0 again" + "### Reset the index, so it starts with 0 again" ] }, { @@ -1395,7 +1384,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "collapsed": true }, @@ -1404,23 +1392,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/11_Indexing/Exercises.ipynb b/11_Indexing/New/Exercises.ipynb similarity index 96% rename from 11_Indexing/Exercises.ipynb rename to 11_Indexing/New/Exercises.ipynb index 55c3cc69e..89eb697eb 100644 --- a/11_Indexing/Exercises.ipynb +++ b/11_Indexing/New/Exercises.ipynb @@ -278,23 +278,22 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, diff --git a/requirements.txt b/requirements.txt index 45d521309..3ca18467a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -numpy==1.13.1 +numpy==1.21.0 matplotlib==2.0.2 seaborn==0.8.1 pandas==0.23.4 \ No newline at end of file