{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Standard imports, get names from database." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
first_namelast_name
0PENELOPEGUINESS
1NICKWAHLBERG
2EDCHASE
3JENNIFERDAVIS
4JOHNNYLOLLOBRIGIDA
\n", "
" ], "text/plain": [ " first_name last_name\n", "0 PENELOPE GUINESS\n", "1 NICK WAHLBERG\n", "2 ED CHASE\n", "3 JENNIFER DAVIS\n", "4 JOHNNY LOLLOBRIGIDA" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import pymysql\n", "\n", "sakila = pymysql.connect(host=\"localhost\",\n", " user=\"root\",\n", " password=\"\",\n", " db=\"sakila\",\n", " charset=\"utf8mb4\",\n", " cursorclass=pymysql.cursors.DictCursor)\n", "\n", "grades = pd.read_sql(\"select first_name, last_name from actor\", con = sakila)\n", "grades.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Update the index with login ids that look like GT login IDs, generated from names, e.g.. Penelope Guinness's id is pguiness0." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
first_namelast_name
loginId
pguiness0PENELOPEGUINESS
nwahlberg1NICKWAHLBERG
echase2EDCHASE
jdavis3JENNIFERDAVIS
jlollobrigida4JOHNNYLOLLOBRIGIDA
\n", "
" ], "text/plain": [ " first_name last_name\n", "loginId \n", "pguiness0 PENELOPE GUINESS\n", "nwahlberg1 NICK WAHLBERG\n", "echase2 ED CHASE\n", "jdavis3 JENNIFER DAVIS\n", "jlollobrigida4 JOHNNY LOLLOBRIGIDA" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ids = [grades.loc[i]['first_name'].lower()[0] + grades.loc[i]['last_name'].lower() + str(i)\n", " for i in grades.index]\n", "grades.index = ids\n", "grades.index.name = \"loginId\"\n", "grades.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Convert names to capitalized form." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
first_namelast_name
loginId
pguiness0PenelopeGuiness
nwahlberg1NickWahlberg
echase2EdChase
jdavis3JenniferDavis
jlollobrigida4JohnnyLollobrigida
\n", "
" ], "text/plain": [ " first_name last_name\n", "loginId \n", "pguiness0 Penelope Guiness\n", "nwahlberg1 Nick Wahlberg\n", "echase2 Ed Chase\n", "jdavis3 Jennifer Davis\n", "jlollobrigida4 Johnny Lollobrigida" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grades['first_name'] = grades.apply(lambda row: row['first_name'].capitalize(), axis=1)\n", "grades['last_name'] = grades.apply(lambda row: row['last_name'].capitalize(), axis=1)\n", "grades.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Make a single \"name\" column with last_name, first_name format, get rid of old last_name and first_name columns." ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
name
loginId
pguiness0Guiness, Penelope
nwahlberg1Wahlberg, Nick
echase2Chase, Ed
jdavis3Davis, Jennifer
jlollobrigida4Lollobrigida, Johnny
\n", "
" ], "text/plain": [ " name\n", "loginId \n", "pguiness0 Guiness, Penelope\n", "nwahlberg1 Wahlberg, Nick\n", "echase2 Chase, Ed\n", "jdavis3 Davis, Jennifer\n", "jlollobrigida4 Lollobrigida, Johnny" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grades['name'] = grades.apply(lambda row: f\"{row['last_name']}, {row['first_name']}\", axis=1)\n", "del(grades['first_name'])\n", "del(grades['last_name'])\n", "grades.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Add some fake assignments and grades, convert values to integers, and save resulting DataFrame to a CSV file." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namehw0hw1hw3quiz1quiz2quiz3quiz4quiz5quiz6exam1exam2exam3final_exam
loginId
pguiness0Guiness, Penelope889210066636459666687789474
nwahlberg1Wahlberg, Nick97919253647061787088839072
echase2Chase, Ed87899383717970637293768469
jdavis3Davis, Jennifer88849866646270647390968383
jlollobrigida4Lollobrigida, Johnny89848276746872628481767867
\n", "
" ], "text/plain": [ " name hw0 hw1 hw3 quiz1 quiz2 quiz3 \\\n", "loginId \n", "pguiness0 Guiness, Penelope 88 92 100 66 63 64 \n", "nwahlberg1 Wahlberg, Nick 97 91 92 53 64 70 \n", "echase2 Chase, Ed 87 89 93 83 71 79 \n", "jdavis3 Davis, Jennifer 88 84 98 66 64 62 \n", "jlollobrigida4 Lollobrigida, Johnny 89 84 82 76 74 68 \n", "\n", " quiz4 quiz5 quiz6 exam1 exam2 exam3 final_exam \n", "loginId \n", "pguiness0 59 66 66 87 78 94 74 \n", "nwahlberg1 61 78 70 88 83 90 72 \n", "echase2 70 63 72 93 76 84 69 \n", "jdavis3 70 64 73 90 96 83 83 \n", "jlollobrigida4 72 62 84 81 76 78 67 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "for hw in [\"hw0\", \"hw1\", \"hw3\"]:\n", " grades[hw] = np.random.normal(90, 5, len(grades))\n", "for quiz in [\"quiz1\", \"quiz2\", \"quiz3\", \"quiz4\", \"quiz5\", \"quiz6\"]:\n", " grades[quiz] = np.random.normal(70, 10, len(grades))\n", "for exam in [\"exam1\", \"exam2\", \"exam3\", \"final_exam\"]:\n", " grades[exam] = np.random.normal(78, 10, len(grades))\n", "# Convert grades to integers\n", "for col in grades.columns[1:]:\n", " grades[col] = grades[col].astype('int', copy=False)\n", "grades.to_csv(\"sakila-grades.csv\")\n", "grades.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }