{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "sipVQMtCP1EU" }, "source": [ "[![AnalyticsDojo](https://github.com/rpi-techfundamentals/spring2019-materials/blob/master/fig/final-logo.png?raw=1)](http://rpi.analyticsdojo.com)\n", "

Basic Text Feature Creation in Python

\n", "

rpi.analyticsdojo.com

" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Basic Text Feature Creation in Python" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 468 }, "colab_type": "code", "id": "k_VzxzKJP9J4", "outputId": "df7a3475-9b02-4f64-ae30-a8fc4f2eb47f" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2019-03-11 14:58:22-- https://raw.githubusercontent.com/rpi-techfundamentals/spring2019-materials/master/input/train.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 61194 (60K) [text/plain]\n", "Saving to: ‘train.csv.1’\n", "\n", "train.csv.1 100%[===================>] 59.76K --.-KB/s in 0.03s \n", "\n", "2019-03-11 14:58:23 (2.32 MB/s) - ‘train.csv.1’ saved [61194/61194]\n", "\n", "--2019-03-11 14:58:23-- https://raw.githubusercontent.com/rpi-techfundamentals/spring2019-materials/master/input/test.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 28629 (28K) [text/plain]\n", "Saving to: ‘test.csv.1’\n", "\n", "test.csv.1 100%[===================>] 27.96K --.-KB/s in 0.01s \n", "\n", "2019-03-11 14:58:24 (2.27 MB/s) - ‘test.csv.1’ saved [28629/28629]\n", "\n" ] } ], "source": [ "!wget https://raw.githubusercontent.com/rpi-techfundamentals/spring2019-materials/master/input/train.csv\n", "!wget https://raw.githubusercontent.com/rpi-techfundamentals/spring2019-materials/master/input/test.csv" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "4VE8Lm6TProo" }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import pandas as pd\n", "\n", "train= pd.read_csv('train.csv')\n", "test = pd.read_csv('test.csv')\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "Xl7KmTDhP75w" }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 475 }, "colab_type": "code", "id": "uzvUhUzoProy", "outputId": "a1329bc4-e5ec-4f45-c3ac-af3ac2da6848" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass \\\n", "0 1 0 3 \n", "1 2 1 1 \n", "2 3 1 3 \n", "3 4 1 1 \n", "4 5 0 3 \n", "\n", " Name Sex Age SibSp \\\n", "0 Braund, Mr. Owen Harris male 22.0 1 \n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", "2 Heikkinen, Miss. Laina female 26.0 0 \n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", "4 Allen, Mr. William Henry male 35.0 0 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "0 0 A/5 21171 7.2500 NaN S \n", "1 0 PC 17599 71.2833 C85 C \n", "2 0 STON/O2. 3101282 7.9250 NaN S \n", "3 0 113803 53.1000 C123 S \n", "4 0 373450 8.0500 NaN S " ] }, "execution_count": 5, "metadata": { "tags": [] }, "output_type": "execute_result" } ], "source": [ "#Print to standard output, and see the results in the \"log\" section below after running your script\n", "train.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 324 }, "colab_type": "code", "id": "Q3yOOb59Pro_", "outputId": "cf111e0c-1563-45e2-a657-b80b0f786d70" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassAgeSibSpParchFare
count891.000000891.000000891.000000714.000000891.000000891.000000891.000000
mean446.0000000.3838382.30864229.6991180.5230080.38159432.204208
std257.3538420.4865920.83607114.5264971.1027430.80605749.693429
min1.0000000.0000001.0000000.4200000.0000000.0000000.000000
25%223.5000000.0000002.00000020.1250000.0000000.0000007.910400
50%446.0000000.0000003.00000028.0000000.0000000.00000014.454200
75%668.5000001.0000003.00000038.0000001.0000000.00000031.000000
max891.0000001.0000003.00000080.0000008.0000006.000000512.329200
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass Age SibSp \\\n", "count 891.000000 891.000000 891.000000 714.000000 891.000000 \n", "mean 446.000000 0.383838 2.308642 29.699118 0.523008 \n", "std 257.353842 0.486592 0.836071 14.526497 1.102743 \n", "min 1.000000 0.000000 1.000000 0.420000 0.000000 \n", "25% 223.500000 0.000000 2.000000 20.125000 0.000000 \n", "50% 446.000000 0.000000 3.000000 28.000000 0.000000 \n", "75% 668.500000 1.000000 3.000000 38.000000 1.000000 \n", "max 891.000000 1.000000 3.000000 80.000000 8.000000 \n", "\n", " Parch Fare \n", "count 891.000000 891.000000 \n", "mean 0.381594 32.204208 \n", "std 0.806057 49.693429 \n", "min 0.000000 0.000000 \n", "25% 0.000000 7.910400 \n", "50% 0.000000 14.454200 \n", "75% 0.000000 31.000000 \n", "max 6.000000 512.329200 " ] }, "execution_count": 6, "metadata": { "tags": [] }, "output_type": "execute_result" } ], "source": [ "#Print to standard output, and see the results in the \"log\" section below after running your script\n", "train.describe()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 267 }, "colab_type": "code", "id": "0kbWnA9hPrpN", "outputId": "0a9980eb-84cd-4ab5-9e1e-6caa0364f44d" }, "outputs": [ { "data": { "text/plain": [ "PassengerId int64\n", "Survived int64\n", "Pclass int64\n", "Name object\n", "Sex object\n", "Age float64\n", "SibSp int64\n", "Parch int64\n", "Ticket object\n", "Fare float64\n", "Cabin object\n", "Embarked object\n", "dtype: object" ] }, "execution_count": 7, "metadata": { "tags": [] }, "output_type": "execute_result" } ], "source": [ "train.dtypes" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1108 }, "colab_type": "code", "id": "WBDI4kNuPrpX", "outputId": "06acd3da-e37b-49f8-8513-cc7700f494c2" }, "outputs": [ { "data": { "text/plain": [ "0 22.0\n", "1 38.0\n", "2 26.0\n", "3 35.0\n", "4 35.0\n", "5 NaN\n", "6 54.0\n", "7 2.0\n", "8 27.0\n", "9 14.0\n", "10 4.0\n", "11 58.0\n", "12 20.0\n", "13 39.0\n", "14 14.0\n", "15 55.0\n", "16 2.0\n", "17 NaN\n", "18 31.0\n", "19 NaN\n", "20 35.0\n", "21 34.0\n", "22 15.0\n", "23 28.0\n", "24 8.0\n", "25 38.0\n", "26 NaN\n", "27 19.0\n", "28 NaN\n", "29 NaN\n", " ... \n", "861 21.0\n", "862 48.0\n", "863 NaN\n", "864 24.0\n", "865 42.0\n", "866 27.0\n", "867 31.0\n", "868 NaN\n", "869 4.0\n", "870 26.0\n", "871 47.0\n", "872 33.0\n", "873 47.0\n", "874 28.0\n", "875 15.0\n", "876 20.0\n", "877 19.0\n", "878 NaN\n", "879 56.0\n", "880 25.0\n", "881 33.0\n", "882 22.0\n", "883 28.0\n", "884 25.0\n", "885 39.0\n", "886 27.0\n", "887 19.0\n", "888 NaN\n", "889 26.0\n", "890 32.0\n", "Name: Age, Length: 891, dtype: float64" ] }, "execution_count": 8, "metadata": { "tags": [] }, "output_type": "execute_result" } ], "source": [ "#Let's look at the age field. We can see \"NaN\" (which indicates missing values).s\n", "train[\"Age\"]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1126 }, "colab_type": "code", "id": "ZK8wYH0yPrpl", "outputId": "1e0892f6-3667-4e2e-ad64-92910ead19bf" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The Median age is: 28.0 years old.\n" ] }, { "data": { "text/plain": [ "0 22.0\n", "1 38.0\n", "2 26.0\n", "3 35.0\n", "4 35.0\n", "5 28.0\n", "6 54.0\n", "7 2.0\n", "8 27.0\n", "9 14.0\n", "10 4.0\n", "11 58.0\n", "12 20.0\n", "13 39.0\n", "14 14.0\n", "15 55.0\n", "16 2.0\n", "17 28.0\n", "18 31.0\n", "19 28.0\n", "20 35.0\n", "21 34.0\n", "22 15.0\n", "23 28.0\n", "24 8.0\n", "25 38.0\n", "26 28.0\n", "27 19.0\n", "28 28.0\n", "29 28.0\n", " ... \n", "861 21.0\n", "862 48.0\n", "863 28.0\n", "864 24.0\n", "865 42.0\n", "866 27.0\n", "867 31.0\n", "868 28.0\n", "869 4.0\n", "870 26.0\n", "871 47.0\n", "872 33.0\n", "873 47.0\n", "874 28.0\n", "875 15.0\n", "876 20.0\n", "877 19.0\n", "878 28.0\n", "879 56.0\n", "880 25.0\n", "881 33.0\n", "882 22.0\n", "883 28.0\n", "884 25.0\n", "885 39.0\n", "886 27.0\n", "887 19.0\n", "888 28.0\n", "889 26.0\n", "890 32.0\n", "Name: Age, Length: 891, dtype: float64" ] }, "execution_count": 9, "metadata": { "tags": [] }, "output_type": "execute_result" } ], "source": [ "#Now let's recode. \n", "medianAge=train[\"Age\"].median()\n", "print (\"The Median age is:\", medianAge, \" years old.\")\n", "train[\"Age\"] = train[\"Age\"].fillna(medianAge)\n", "\n", "#Option 2 all in one shot! \n", "train[\"Age\"] = train[\"Age\"].fillna(train[\"Age\"].median())\n", "train[\"Age\"] " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "m9okvlm_Prps" }, "outputs": [], "source": [ "#For Recoding Data, we can use what we know of selecting rows and columns\n", "train[\"Embarked\"] = train[\"Embarked\"].fillna(\"S\")\n", "train.loc[train[\"Embarked\"] == \"S\", \"EmbarkedRecode\"] = 0\n", "train.loc[train[\"Embarked\"] == \"C\", \"EmbarkedRecode\"] = 1\n", "train.loc[train[\"Embarked\"] == \"Q\", \"EmbarkedRecode\"] = 2" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "pyMwfV6dPrpx" }, "outputs": [], "source": [ "# We can also use something called a lambda function \n", "# You can read more about the lambda function here.\n", "#http://www.python-course.eu/lambda.php \n", "gender_fn = lambda x: 0 if x == 'male' else 1\n", "train['Gender'] = train['Sex'].map(gender_fn)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 4280 }, "colab_type": "code", "id": "Y5n8mVPOPrp1", "outputId": "49041ba7-70d3-4cea-e1d3-b47fa87e971f" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedEmbarkedRecodeGenderNameLengthAge2
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS0.0023484.0
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C1.01511444.0
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS0.0122676.0
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S0.01441225.0
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS0.00241225.0
5603Moran, Mr. Jamesmale28.0003308778.4583NaNQ2.0016784.0
6701McCarthy, Mr. Timothy Jmale54.0001746351.8625E46S0.00232916.0
7803Palsson, Master. Gosta Leonardmale2.03134990921.0750NaNS0.00304.0
8913Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)female27.00234774211.1333NaNS0.0149729.0
91012Nasser, Mrs. Nicholas (Adele Achem)female14.01023773630.0708NaNC1.0135196.0
101113Sandstrom, Miss. Marguerite Rutfemale4.011PP 954916.7000G6S0.013116.0
111211Bonnell, Miss. Elizabethfemale58.00011378326.5500C103S0.01243364.0
121303Saundercock, Mr. William Henrymale20.000A/5. 21518.0500NaNS0.0030400.0
131403Andersson, Mr. Anders Johanmale39.01534708231.2750NaNS0.00271521.0
141503Vestrom, Miss. Hulda Amanda Adolfinafemale14.0003504067.8542NaNS0.0136196.0
151612Hewlett, Mrs. (Mary D Kingcome)female55.00024870616.0000NaNS0.01323025.0
161703Rice, Master. Eugenemale2.04138265229.1250NaNQ2.00204.0
171812Williams, Mr. Charles Eugenemale28.00024437313.0000NaNS0.0028784.0
181903Vander Planke, Mrs. Julius (Emelia Maria Vande...female31.01034576318.0000NaNS0.0155961.0
192013Masselmani, Mrs. Fatimafemale28.00026497.2250NaNC1.0123784.0
202102Fynney, Mr. Joseph Jmale35.00023986526.0000NaNS0.00201225.0
212212Beesley, Mr. Lawrencemale34.00024869813.0000D56S0.00211156.0
222313McGowan, Miss. Anna \"Annie\"female15.0003309238.0292NaNQ2.0127225.0
232411Sloper, Mr. William Thompsonmale28.00011378835.5000A6S0.0028784.0
242503Palsson, Miss. Torborg Danirafemale8.03134990921.0750NaNS0.012964.0
252613Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...female38.01534707731.3875NaNS0.01571444.0
262703Emir, Mr. Farred Chehabmale28.00026317.2250NaNC1.0023784.0
272801Fortune, Mr. Charles Alexandermale19.03219950263.0000C23 C25 C27S0.0030361.0
282913O'Dwyer, Miss. Ellen \"Nellie\"female28.0003309597.8792NaNQ2.0129784.0
293003Todoroff, Mr. Laliomale28.0003492167.8958NaNS0.0019784.0
...................................................
86186202Giles, Mr. Frederick Edwardmale21.0102813411.5000NaNS0.0027441.0
86286311Swift, Mrs. Frederick Joel (Margaret Welles Ba...female48.0001746625.9292D17S0.01512304.0
86386403Sage, Miss. Dorothy Edith \"Dolly\"female28.082CA. 234369.5500NaNS0.0133784.0
86486502Gill, Mr. John Williammale24.00023386613.0000NaNS0.0022576.0
86586612Bystrom, Mrs. (Karolina)female42.00023685213.0000NaNS0.01241764.0
86686712Duran y More, Miss. Asuncionfemale27.010SC/PARIS 214913.8583NaNC1.0128729.0
86786801Roebling, Mr. Washington Augustus IImale31.000PC 1759050.4958A24S0.0036961.0
86886903van Melkebeke, Mr. Philemonmale28.0003457779.5000NaNS0.0027784.0
86987013Johnson, Master. Harold Theodormale4.01134774211.1333NaNS0.003116.0
87087103Balkic, Mr. Cerinmale26.0003492487.8958NaNS0.0017676.0
87187211Beckwith, Mrs. Richard Leonard (Sallie Monypeny)female47.0111175152.5542D35S0.01482209.0
87287301Carlsson, Mr. Frans Olofmale33.0006955.0000B51 B53 B55S0.00241089.0
87387403Vander Cruyssen, Mr. Victormale47.0003457659.0000NaNS0.00272209.0
87487512Abelson, Mrs. Samuel (Hannah Wizosky)female28.010P/PP 338124.0000NaNC1.0137784.0
87587613Najib, Miss. Adele Kiamie \"Jane\"female15.00026677.2250NaNC1.0132225.0
87687703Gustafsson, Mr. Alfred Ossianmale20.00075349.8458NaNS0.0029400.0
87787803Petroff, Mr. Nedeliomale19.0003492127.8958NaNS0.0020361.0
87887903Laleff, Mr. Kristomale28.0003492177.8958NaNS0.0018784.0
87988011Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)female56.0011176783.1583C50C1.01453136.0
88088112Shelley, Mrs. William (Imanita Parrish Hall)female25.00123043326.0000NaNS0.0144625.0
88188203Markun, Mr. Johannmale33.0003492577.8958NaNS0.00181089.0
88288303Dahlberg, Miss. Gerda Ulrikafemale22.000755210.5167NaNS0.0128484.0
88388402Banfield, Mr. Frederick Jamesmale28.000C.A./SOTON 3406810.5000NaNS0.0029784.0
88488503Sutehall, Mr. Henry Jrmale25.000SOTON/OQ 3920767.0500NaNS0.0022625.0
88588603Rice, Mrs. William (Margaret Norton)female39.00538265229.1250NaNQ2.01361521.0
88688702Montvila, Rev. Juozasmale27.00021153613.0000NaNS0.0021729.0
88788811Graham, Miss. Margaret Edithfemale19.00011205330.0000B42S0.0128361.0
88888903Johnston, Miss. Catherine Helen \"Carrie\"female28.012W./C. 660723.4500NaNS0.0140784.0
88989011Behr, Mr. Karl Howellmale26.00011136930.0000C148C1.0021676.0
89089103Dooley, Mr. Patrickmale32.0003703767.7500NaNQ2.00191024.0
\n", "

891 rows × 16 columns

\n", "
" ], "text/plain": [ " PassengerId Survived Pclass \\\n", "0 1 0 3 \n", "1 2 1 1 \n", "2 3 1 3 \n", "3 4 1 1 \n", "4 5 0 3 \n", "5 6 0 3 \n", "6 7 0 1 \n", "7 8 0 3 \n", "8 9 1 3 \n", "9 10 1 2 \n", "10 11 1 3 \n", "11 12 1 1 \n", "12 13 0 3 \n", "13 14 0 3 \n", "14 15 0 3 \n", "15 16 1 2 \n", "16 17 0 3 \n", "17 18 1 2 \n", "18 19 0 3 \n", "19 20 1 3 \n", "20 21 0 2 \n", "21 22 1 2 \n", "22 23 1 3 \n", "23 24 1 1 \n", "24 25 0 3 \n", "25 26 1 3 \n", "26 27 0 3 \n", "27 28 0 1 \n", "28 29 1 3 \n", "29 30 0 3 \n", ".. ... ... ... \n", "861 862 0 2 \n", "862 863 1 1 \n", "863 864 0 3 \n", "864 865 0 2 \n", "865 866 1 2 \n", "866 867 1 2 \n", "867 868 0 1 \n", "868 869 0 3 \n", "869 870 1 3 \n", "870 871 0 3 \n", "871 872 1 1 \n", "872 873 0 1 \n", "873 874 0 3 \n", "874 875 1 2 \n", "875 876 1 3 \n", "876 877 0 3 \n", "877 878 0 3 \n", "878 879 0 3 \n", "879 880 1 1 \n", "880 881 1 2 \n", "881 882 0 3 \n", "882 883 0 3 \n", "883 884 0 2 \n", "884 885 0 3 \n", "885 886 0 3 \n", "886 887 0 2 \n", "887 888 1 1 \n", "888 889 0 3 \n", "889 890 1 1 \n", "890 891 0 3 \n", "\n", " Name Sex Age SibSp \\\n", "0 Braund, Mr. Owen Harris male 22.0 1 \n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", "2 Heikkinen, Miss. Laina female 26.0 0 \n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", "4 Allen, Mr. William Henry male 35.0 0 \n", "5 Moran, Mr. James male 28.0 0 \n", "6 McCarthy, Mr. Timothy J male 54.0 0 \n", "7 Palsson, Master. Gosta Leonard male 2.0 3 \n", "8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 \n", "9 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 \n", "10 Sandstrom, Miss. Marguerite Rut female 4.0 1 \n", "11 Bonnell, Miss. Elizabeth female 58.0 0 \n", "12 Saundercock, Mr. William Henry male 20.0 0 \n", "13 Andersson, Mr. Anders Johan male 39.0 1 \n", "14 Vestrom, Miss. Hulda Amanda Adolfina female 14.0 0 \n", "15 Hewlett, Mrs. (Mary D Kingcome) female 55.0 0 \n", "16 Rice, Master. Eugene male 2.0 4 \n", "17 Williams, Mr. Charles Eugene male 28.0 0 \n", "18 Vander Planke, Mrs. Julius (Emelia Maria Vande... female 31.0 1 \n", "19 Masselmani, Mrs. Fatima female 28.0 0 \n", "20 Fynney, Mr. Joseph J male 35.0 0 \n", "21 Beesley, Mr. Lawrence male 34.0 0 \n", "22 McGowan, Miss. Anna \"Annie\" female 15.0 0 \n", "23 Sloper, Mr. William Thompson male 28.0 0 \n", "24 Palsson, Miss. Torborg Danira female 8.0 3 \n", "25 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.0 1 \n", "26 Emir, Mr. Farred Chehab male 28.0 0 \n", "27 Fortune, Mr. Charles Alexander male 19.0 3 \n", "28 O'Dwyer, Miss. Ellen \"Nellie\" female 28.0 0 \n", "29 Todoroff, Mr. Lalio male 28.0 0 \n", ".. ... ... ... ... \n", "861 Giles, Mr. Frederick Edward male 21.0 1 \n", "862 Swift, Mrs. Frederick Joel (Margaret Welles Ba... female 48.0 0 \n", "863 Sage, Miss. Dorothy Edith \"Dolly\" female 28.0 8 \n", "864 Gill, Mr. John William male 24.0 0 \n", "865 Bystrom, Mrs. (Karolina) female 42.0 0 \n", "866 Duran y More, Miss. Asuncion female 27.0 1 \n", "867 Roebling, Mr. Washington Augustus II male 31.0 0 \n", "868 van Melkebeke, Mr. Philemon male 28.0 0 \n", "869 Johnson, Master. Harold Theodor male 4.0 1 \n", "870 Balkic, Mr. Cerin male 26.0 0 \n", "871 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) female 47.0 1 \n", "872 Carlsson, Mr. Frans Olof male 33.0 0 \n", "873 Vander Cruyssen, Mr. Victor male 47.0 0 \n", "874 Abelson, Mrs. Samuel (Hannah Wizosky) female 28.0 1 \n", "875 Najib, Miss. Adele Kiamie \"Jane\" female 15.0 0 \n", "876 Gustafsson, Mr. Alfred Ossian male 20.0 0 \n", "877 Petroff, Mr. Nedelio male 19.0 0 \n", "878 Laleff, Mr. Kristo male 28.0 0 \n", "879 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.0 0 \n", "880 Shelley, Mrs. William (Imanita Parrish Hall) female 25.0 0 \n", "881 Markun, Mr. Johann male 33.0 0 \n", "882 Dahlberg, Miss. Gerda Ulrika female 22.0 0 \n", "883 Banfield, Mr. Frederick James male 28.0 0 \n", "884 Sutehall, Mr. Henry Jr male 25.0 0 \n", "885 Rice, Mrs. William (Margaret Norton) female 39.0 0 \n", "886 Montvila, Rev. Juozas male 27.0 0 \n", "887 Graham, Miss. Margaret Edith female 19.0 0 \n", "888 Johnston, Miss. Catherine Helen \"Carrie\" female 28.0 1 \n", "889 Behr, Mr. Karl Howell male 26.0 0 \n", "890 Dooley, Mr. Patrick male 32.0 0 \n", "\n", " Parch Ticket Fare Cabin Embarked EmbarkedRecode \\\n", "0 0 A/5 21171 7.2500 NaN S 0.0 \n", "1 0 PC 17599 71.2833 C85 C 1.0 \n", "2 0 STON/O2. 3101282 7.9250 NaN S 0.0 \n", "3 0 113803 53.1000 C123 S 0.0 \n", "4 0 373450 8.0500 NaN S 0.0 \n", "5 0 330877 8.4583 NaN Q 2.0 \n", "6 0 17463 51.8625 E46 S 0.0 \n", "7 1 349909 21.0750 NaN S 0.0 \n", "8 2 347742 11.1333 NaN S 0.0 \n", "9 0 237736 30.0708 NaN C 1.0 \n", "10 1 PP 9549 16.7000 G6 S 0.0 \n", "11 0 113783 26.5500 C103 S 0.0 \n", "12 0 A/5. 2151 8.0500 NaN S 0.0 \n", "13 5 347082 31.2750 NaN S 0.0 \n", "14 0 350406 7.8542 NaN S 0.0 \n", "15 0 248706 16.0000 NaN S 0.0 \n", "16 1 382652 29.1250 NaN Q 2.0 \n", "17 0 244373 13.0000 NaN S 0.0 \n", "18 0 345763 18.0000 NaN S 0.0 \n", "19 0 2649 7.2250 NaN C 1.0 \n", "20 0 239865 26.0000 NaN S 0.0 \n", "21 0 248698 13.0000 D56 S 0.0 \n", "22 0 330923 8.0292 NaN Q 2.0 \n", "23 0 113788 35.5000 A6 S 0.0 \n", "24 1 349909 21.0750 NaN S 0.0 \n", "25 5 347077 31.3875 NaN S 0.0 \n", "26 0 2631 7.2250 NaN C 1.0 \n", "27 2 19950 263.0000 C23 C25 C27 S 0.0 \n", "28 0 330959 7.8792 NaN Q 2.0 \n", "29 0 349216 7.8958 NaN S 0.0 \n", ".. ... ... ... ... ... ... \n", "861 0 28134 11.5000 NaN S 0.0 \n", "862 0 17466 25.9292 D17 S 0.0 \n", "863 2 CA. 2343 69.5500 NaN S 0.0 \n", "864 0 233866 13.0000 NaN S 0.0 \n", "865 0 236852 13.0000 NaN S 0.0 \n", "866 0 SC/PARIS 2149 13.8583 NaN C 1.0 \n", "867 0 PC 17590 50.4958 A24 S 0.0 \n", "868 0 345777 9.5000 NaN S 0.0 \n", "869 1 347742 11.1333 NaN S 0.0 \n", "870 0 349248 7.8958 NaN S 0.0 \n", "871 1 11751 52.5542 D35 S 0.0 \n", "872 0 695 5.0000 B51 B53 B55 S 0.0 \n", "873 0 345765 9.0000 NaN S 0.0 \n", "874 0 P/PP 3381 24.0000 NaN C 1.0 \n", "875 0 2667 7.2250 NaN C 1.0 \n", "876 0 7534 9.8458 NaN S 0.0 \n", "877 0 349212 7.8958 NaN S 0.0 \n", "878 0 349217 7.8958 NaN S 0.0 \n", "879 1 11767 83.1583 C50 C 1.0 \n", "880 1 230433 26.0000 NaN S 0.0 \n", "881 0 349257 7.8958 NaN S 0.0 \n", "882 0 7552 10.5167 NaN S 0.0 \n", "883 0 C.A./SOTON 34068 10.5000 NaN S 0.0 \n", "884 0 SOTON/OQ 392076 7.0500 NaN S 0.0 \n", "885 5 382652 29.1250 NaN Q 2.0 \n", "886 0 211536 13.0000 NaN S 0.0 \n", "887 0 112053 30.0000 B42 S 0.0 \n", "888 2 W./C. 6607 23.4500 NaN S 0.0 \n", "889 0 111369 30.0000 C148 C 1.0 \n", "890 0 370376 7.7500 NaN Q 2.0 \n", "\n", " Gender NameLength Age2 \n", "0 0 23 484.0 \n", "1 1 51 1444.0 \n", "2 1 22 676.0 \n", "3 1 44 1225.0 \n", "4 0 24 1225.0 \n", "5 0 16 784.0 \n", "6 0 23 2916.0 \n", "7 0 30 4.0 \n", "8 1 49 729.0 \n", "9 1 35 196.0 \n", "10 1 31 16.0 \n", "11 1 24 3364.0 \n", "12 0 30 400.0 \n", "13 0 27 1521.0 \n", "14 1 36 196.0 \n", "15 1 32 3025.0 \n", "16 0 20 4.0 \n", "17 0 28 784.0 \n", "18 1 55 961.0 \n", "19 1 23 784.0 \n", "20 0 20 1225.0 \n", "21 0 21 1156.0 \n", "22 1 27 225.0 \n", "23 0 28 784.0 \n", "24 1 29 64.0 \n", "25 1 57 1444.0 \n", "26 0 23 784.0 \n", "27 0 30 361.0 \n", "28 1 29 784.0 \n", "29 0 19 784.0 \n", ".. ... ... ... \n", "861 0 27 441.0 \n", "862 1 51 2304.0 \n", "863 1 33 784.0 \n", "864 0 22 576.0 \n", "865 1 24 1764.0 \n", "866 1 28 729.0 \n", "867 0 36 961.0 \n", "868 0 27 784.0 \n", "869 0 31 16.0 \n", "870 0 17 676.0 \n", "871 1 48 2209.0 \n", "872 0 24 1089.0 \n", "873 0 27 2209.0 \n", "874 1 37 784.0 \n", "875 1 32 225.0 \n", "876 0 29 400.0 \n", "877 0 20 361.0 \n", "878 0 18 784.0 \n", "879 1 45 3136.0 \n", "880 1 44 625.0 \n", "881 0 18 1089.0 \n", "882 1 28 484.0 \n", "883 0 29 784.0 \n", "884 0 22 625.0 \n", "885 1 36 1521.0 \n", "886 0 21 729.0 \n", "887 1 28 361.0 \n", "888 1 40 784.0 \n", "889 0 21 676.0 \n", "890 0 19 1024.0 \n", "\n", "[891 rows x 16 columns]" ] }, "execution_count": 12, "metadata": { "tags": [] }, "output_type": "execute_result" } ], "source": [ "#or we can do in one shot\n", "train['NameLength'] = train['Name'].map(lambda x: len(x))\n", "train['Age2'] = train['Age'].map(lambda x: x*x)\n", "train" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "G6ZCxH_mPrp5" }, "outputs": [], "source": [ "\n", "#We can start to create little small functions that will find a string.\n", "def has_title(name):\n", " for s in ['Mr.', 'Mrs.', 'Miss.', 'Dr.', 'Sir.']:\n", " if name.find(s) >= 0:\n", " return True\n", " return False\n", "\n", "#Now we are using that separate function in another function. \n", "title_fn = lambda x: 1 if has_title(x) else 0\n", "#Finally, we call the function for name\n", "train['Title'] = train['Name'].map(title_fn)\n", "test['Title']= train['Name'].map(title_fn)\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 4174 }, "colab_type": "code", "id": "HiHaSvJpPrp8", "outputId": "838f7447-2df5-401d-cc4c-62a44668078c" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedTitle
08923Kelly, Mr. Jamesmale34.5003309117.8292NaNQ1
18933Wilkes, Mrs. James (Ellen Needs)female47.0103632727.0000NaNS1
28942Myles, Mr. Thomas Francismale62.0002402769.6875NaNQ1
38953Wirz, Mr. Albertmale27.0003151548.6625NaNS1
48963Hirvonen, Mrs. Alexander (Helga E Lindqvist)female22.011310129812.2875NaNS1
58973Svensson, Mr. Johan Cervinmale14.00075389.2250NaNS1
68983Connolly, Miss. Katefemale30.0003309727.6292NaNQ1
78992Caldwell, Mr. Albert Francismale26.01124873829.0000NaNS0
89003Abrahim, Mrs. Joseph (Sophie Halaut Easu)female18.00026577.2292NaNC1
99013Davies, Mr. John Samuelmale21.020A/4 4887124.1500NaNS1
109023Ilieff, Mr. YliomaleNaN003492207.8958NaNS1
119031Jones, Mr. Charles Cressonmale46.00069426.0000NaNS1
129041Snyder, Mrs. John Pillsbury (Nelle Stevenson)female23.0102122882.2667B45S1
139052Howard, Mr. Benjaminmale63.0102406526.0000NaNS1
149061Chaffee, Mrs. Herbert Fuller (Carrie Constance...female47.010W.E.P. 573461.1750E31S1
159072del Carlo, Mrs. Sebastiano (Argenia Genovesi)female24.010SC/PARIS 216727.7208NaNC1
169082Keane, Mr. Danielmale35.00023373412.3500NaNQ0
179093Assaf, Mr. Geriosmale21.00026927.2250NaNC1
189103Ilmakangas, Miss. Ida Livijafemale27.010STON/O2. 31012707.9250NaNS1
199113Assaf Khalil, Mrs. Mariana (Miriam\")\"female45.00026967.2250NaNC1
209121Rothschild, Mr. Martinmale55.010PC 1760359.4000NaNC1
219133Olsen, Master. Artur Karlmale9.001C 173683.1708NaNS1
229141Flegenheim, Mrs. Alfred (Antoinette)femaleNaN00PC 1759831.6833NaNS1
239151Williams, Mr. Richard Norris IImale21.001PC 1759761.3792NaNC1
249161Ryerson, Mrs. Arthur Larned (Emily Maria Borie)female48.013PC 17608262.3750B57 B59 B63 B66C1
259173Robins, Mr. Alexander Amale50.010A/5. 333714.5000NaNS1
269181Ostby, Miss. Helene Ragnhildfemale22.00111350961.9792B36C1
279193Daher, Mr. Shedidmale22.50026987.2250NaNC1
289201Brady, Mr. John Bertrammale41.00011305430.5000A21S1
299213Samaan, Mr. EliasmaleNaN20266221.6792NaNC1
.......................................
38812803Canavan, Mr. Patrickmale21.0003648587.7500NaNQ1
38912813Palsson, Master. Paul Folkemale6.03134990921.0750NaNS1
39012821Payne, Mr. Vivian Ponsonbymale23.0001274993.5000B24S1
39112831Lines, Mrs. Ernest H (Elizabeth Lindsey James)female51.001PC 1759239.4000D28S1
39212843Abbott, Master. Eugene Josephmale13.002C.A. 267320.2500NaNS1
39312852Gilbert, Mr. Williammale47.000C.A. 3076910.5000NaNS1
39412863Kink-Heilmann, Mr. Antonmale29.03131515322.0250NaNS1
39512871Smith, Mrs. Lucien Philip (Mary Eloise Hughes)female18.0101369560.0000C31S1
39612883Colbert, Mr. Patrickmale24.0003711097.2500NaNQ1
39712891Frolicher-Stehli, Mrs. Maxmillian (Margaretha ...female48.0111356779.2000B41C1
39812903Larsson-Rondberg, Mr. Edvard Amale22.0003470657.7750NaNS1
39912913Conlon, Mr. Thomas Henrymale31.000213327.7333NaNQ1
40012921Bonnell, Miss. Carolinefemale30.00036928164.8667C7S1
40112932Gale, Mr. Harrymale38.0102866421.0000NaNS1
40212941Gibson, Miss. Dorothy Winifredfemale22.00111237859.4000NaNC1
40312951Carrau, Mr. Jose Pedromale17.00011305947.1000NaNS1
40412961Frauenthal, Mr. Isaac Geraldmale43.0101776527.7208D40C1
40512972Nourney, Mr. Alfred (Baron von Drachstedt\")\"male20.000SC/PARIS 216613.8625D38C1
40612982Ware, Mr. William Jefferymale23.0102866610.5000NaNS1
40712991Widener, Mr. George Duntonmale50.011113503211.5000C80C0
40813003Riordan, Miss. Johanna Hannah\"\"femaleNaN003349157.7208NaNQ1
40913013Peacock, Miss. Treasteallfemale3.011SOTON/O.Q. 310131513.7750NaNS1
41013023Naughton, Miss. HannahfemaleNaN003652377.7500NaNQ1
41113031Minahan, Mrs. William Edward (Lillian E Thorpe)female37.0101992890.0000C78Q1
41213043Henriksson, Miss. Jenny Lovisafemale28.0003470867.7750NaNS1
41313053Spector, Mr. WoolfmaleNaN00A.5. 32368.0500NaNS1
41413061Oliva y Ocana, Dona. Ferminafemale39.000PC 17758108.9000C105C1
41513073Saether, Mr. Simon Sivertsenmale38.500SOTON/O.Q. 31012627.2500NaNS1
41613083Ware, Mr. FrederickmaleNaN003593098.0500NaNS1
41713093Peter, Master. Michael JmaleNaN11266822.3583NaNC1
\n", "

418 rows × 12 columns

\n", "
" ], "text/plain": [ " PassengerId Pclass Name \\\n", "0 892 3 Kelly, Mr. James \n", "1 893 3 Wilkes, Mrs. James (Ellen Needs) \n", "2 894 2 Myles, Mr. Thomas Francis \n", "3 895 3 Wirz, Mr. Albert \n", "4 896 3 Hirvonen, Mrs. Alexander (Helga E Lindqvist) \n", "5 897 3 Svensson, Mr. Johan Cervin \n", "6 898 3 Connolly, Miss. Kate \n", "7 899 2 Caldwell, Mr. Albert Francis \n", "8 900 3 Abrahim, Mrs. Joseph (Sophie Halaut Easu) \n", "9 901 3 Davies, Mr. John Samuel \n", "10 902 3 Ilieff, Mr. Ylio \n", "11 903 1 Jones, Mr. Charles Cresson \n", "12 904 1 Snyder, Mrs. John Pillsbury (Nelle Stevenson) \n", "13 905 2 Howard, Mr. Benjamin \n", "14 906 1 Chaffee, Mrs. Herbert Fuller (Carrie Constance... \n", "15 907 2 del Carlo, Mrs. Sebastiano (Argenia Genovesi) \n", "16 908 2 Keane, Mr. Daniel \n", "17 909 3 Assaf, Mr. Gerios \n", "18 910 3 Ilmakangas, Miss. Ida Livija \n", "19 911 3 Assaf Khalil, Mrs. Mariana (Miriam\")\" \n", "20 912 1 Rothschild, Mr. Martin \n", "21 913 3 Olsen, Master. Artur Karl \n", "22 914 1 Flegenheim, Mrs. Alfred (Antoinette) \n", "23 915 1 Williams, Mr. Richard Norris II \n", "24 916 1 Ryerson, Mrs. Arthur Larned (Emily Maria Borie) \n", "25 917 3 Robins, Mr. Alexander A \n", "26 918 1 Ostby, Miss. Helene Ragnhild \n", "27 919 3 Daher, Mr. Shedid \n", "28 920 1 Brady, Mr. John Bertram \n", "29 921 3 Samaan, Mr. Elias \n", ".. ... ... ... \n", "388 1280 3 Canavan, Mr. Patrick \n", "389 1281 3 Palsson, Master. Paul Folke \n", "390 1282 1 Payne, Mr. Vivian Ponsonby \n", "391 1283 1 Lines, Mrs. Ernest H (Elizabeth Lindsey James) \n", "392 1284 3 Abbott, Master. Eugene Joseph \n", "393 1285 2 Gilbert, Mr. William \n", "394 1286 3 Kink-Heilmann, Mr. Anton \n", "395 1287 1 Smith, Mrs. Lucien Philip (Mary Eloise Hughes) \n", "396 1288 3 Colbert, Mr. Patrick \n", "397 1289 1 Frolicher-Stehli, Mrs. Maxmillian (Margaretha ... \n", "398 1290 3 Larsson-Rondberg, Mr. Edvard A \n", "399 1291 3 Conlon, Mr. Thomas Henry \n", "400 1292 1 Bonnell, Miss. Caroline \n", "401 1293 2 Gale, Mr. Harry \n", "402 1294 1 Gibson, Miss. Dorothy Winifred \n", "403 1295 1 Carrau, Mr. Jose Pedro \n", "404 1296 1 Frauenthal, Mr. Isaac Gerald \n", "405 1297 2 Nourney, Mr. Alfred (Baron von Drachstedt\")\" \n", "406 1298 2 Ware, Mr. William Jeffery \n", "407 1299 1 Widener, Mr. George Dunton \n", "408 1300 3 Riordan, Miss. Johanna Hannah\"\" \n", "409 1301 3 Peacock, Miss. Treasteall \n", "410 1302 3 Naughton, Miss. Hannah \n", "411 1303 1 Minahan, Mrs. William Edward (Lillian E Thorpe) \n", "412 1304 3 Henriksson, Miss. Jenny Lovisa \n", "413 1305 3 Spector, Mr. Woolf \n", "414 1306 1 Oliva y Ocana, Dona. Fermina \n", "415 1307 3 Saether, Mr. Simon Sivertsen \n", "416 1308 3 Ware, Mr. Frederick \n", "417 1309 3 Peter, Master. Michael J \n", "\n", " Sex Age SibSp Parch Ticket Fare \\\n", "0 male 34.5 0 0 330911 7.8292 \n", "1 female 47.0 1 0 363272 7.0000 \n", "2 male 62.0 0 0 240276 9.6875 \n", "3 male 27.0 0 0 315154 8.6625 \n", "4 female 22.0 1 1 3101298 12.2875 \n", "5 male 14.0 0 0 7538 9.2250 \n", "6 female 30.0 0 0 330972 7.6292 \n", "7 male 26.0 1 1 248738 29.0000 \n", "8 female 18.0 0 0 2657 7.2292 \n", "9 male 21.0 2 0 A/4 48871 24.1500 \n", "10 male NaN 0 0 349220 7.8958 \n", "11 male 46.0 0 0 694 26.0000 \n", "12 female 23.0 1 0 21228 82.2667 \n", "13 male 63.0 1 0 24065 26.0000 \n", "14 female 47.0 1 0 W.E.P. 5734 61.1750 \n", "15 female 24.0 1 0 SC/PARIS 2167 27.7208 \n", "16 male 35.0 0 0 233734 12.3500 \n", "17 male 21.0 0 0 2692 7.2250 \n", "18 female 27.0 1 0 STON/O2. 3101270 7.9250 \n", "19 female 45.0 0 0 2696 7.2250 \n", "20 male 55.0 1 0 PC 17603 59.4000 \n", "21 male 9.0 0 1 C 17368 3.1708 \n", "22 female NaN 0 0 PC 17598 31.6833 \n", "23 male 21.0 0 1 PC 17597 61.3792 \n", "24 female 48.0 1 3 PC 17608 262.3750 \n", "25 male 50.0 1 0 A/5. 3337 14.5000 \n", "26 female 22.0 0 1 113509 61.9792 \n", "27 male 22.5 0 0 2698 7.2250 \n", "28 male 41.0 0 0 113054 30.5000 \n", "29 male NaN 2 0 2662 21.6792 \n", ".. ... ... ... ... ... ... \n", "388 male 21.0 0 0 364858 7.7500 \n", "389 male 6.0 3 1 349909 21.0750 \n", "390 male 23.0 0 0 12749 93.5000 \n", "391 female 51.0 0 1 PC 17592 39.4000 \n", "392 male 13.0 0 2 C.A. 2673 20.2500 \n", "393 male 47.0 0 0 C.A. 30769 10.5000 \n", "394 male 29.0 3 1 315153 22.0250 \n", "395 female 18.0 1 0 13695 60.0000 \n", "396 male 24.0 0 0 371109 7.2500 \n", "397 female 48.0 1 1 13567 79.2000 \n", "398 male 22.0 0 0 347065 7.7750 \n", "399 male 31.0 0 0 21332 7.7333 \n", "400 female 30.0 0 0 36928 164.8667 \n", "401 male 38.0 1 0 28664 21.0000 \n", "402 female 22.0 0 1 112378 59.4000 \n", "403 male 17.0 0 0 113059 47.1000 \n", "404 male 43.0 1 0 17765 27.7208 \n", "405 male 20.0 0 0 SC/PARIS 2166 13.8625 \n", "406 male 23.0 1 0 28666 10.5000 \n", "407 male 50.0 1 1 113503 211.5000 \n", "408 female NaN 0 0 334915 7.7208 \n", "409 female 3.0 1 1 SOTON/O.Q. 3101315 13.7750 \n", "410 female NaN 0 0 365237 7.7500 \n", "411 female 37.0 1 0 19928 90.0000 \n", "412 female 28.0 0 0 347086 7.7750 \n", "413 male NaN 0 0 A.5. 3236 8.0500 \n", "414 female 39.0 0 0 PC 17758 108.9000 \n", "415 male 38.5 0 0 SOTON/O.Q. 3101262 7.2500 \n", "416 male NaN 0 0 359309 8.0500 \n", "417 male NaN 1 1 2668 22.3583 \n", "\n", " Cabin Embarked Title \n", "0 NaN Q 1 \n", "1 NaN S 1 \n", "2 NaN Q 1 \n", "3 NaN S 1 \n", "4 NaN S 1 \n", "5 NaN S 1 \n", "6 NaN Q 1 \n", "7 NaN S 0 \n", "8 NaN C 1 \n", "9 NaN S 1 \n", "10 NaN S 1 \n", "11 NaN S 1 \n", "12 B45 S 1 \n", "13 NaN S 1 \n", "14 E31 S 1 \n", "15 NaN C 1 \n", "16 NaN Q 0 \n", "17 NaN C 1 \n", "18 NaN S 1 \n", "19 NaN C 1 \n", "20 NaN C 1 \n", "21 NaN S 1 \n", "22 NaN S 1 \n", "23 NaN C 1 \n", "24 B57 B59 B63 B66 C 1 \n", "25 NaN S 1 \n", "26 B36 C 1 \n", "27 NaN C 1 \n", "28 A21 S 1 \n", "29 NaN C 1 \n", ".. ... ... ... \n", "388 NaN Q 1 \n", "389 NaN S 1 \n", "390 B24 S 1 \n", "391 D28 S 1 \n", "392 NaN S 1 \n", "393 NaN S 1 \n", "394 NaN S 1 \n", "395 C31 S 1 \n", "396 NaN Q 1 \n", "397 B41 C 1 \n", "398 NaN S 1 \n", "399 NaN Q 1 \n", "400 C7 S 1 \n", "401 NaN S 1 \n", "402 NaN C 1 \n", "403 NaN S 1 \n", "404 D40 C 1 \n", "405 D38 C 1 \n", "406 NaN S 1 \n", "407 C80 C 0 \n", "408 NaN Q 1 \n", "409 NaN S 1 \n", "410 NaN Q 1 \n", "411 C78 Q 1 \n", "412 NaN S 1 \n", "413 NaN S 1 \n", "414 C105 C 1 \n", "415 NaN S 1 \n", "416 NaN S 1 \n", "417 NaN C 1 \n", "\n", "[418 rows x 12 columns]" ] }, "execution_count": 14, "metadata": { "tags": [] }, "output_type": "execute_result" } ], "source": [ "test" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 304 }, "colab_type": "code", "id": "z22z54N_PrqB", "outputId": "39e2833c-ed5a-4d1c-f188-5958737ba060" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:1: FutureWarning: \n", "Passing list-likes to .loc or [] with any missing label will raise\n", "KeyError in the future, you can use .reindex() as an alternative.\n", "\n", "See the documentation here:\n", "http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike\n", " \"\"\"Entry point for launching an IPython kernel.\n", "/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py:1367: FutureWarning: \n", "Passing list-likes to .loc or [] with any missing label will raise\n", "KeyError in the future, you can use .reindex() as an alternative.\n", "\n", "See the documentation here:\n", "http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike\n", " return self._getitem_tuple(key)\n" ] } ], "source": [ "#Writing to File\n", "submission=pd.DataFrame(test.loc[:,['PassengerId','Survived']])\n", "\n", "#Any files you save will be available in the output tab below\n", "submission.to_csv('submission.csv', index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "RKMZUdulPrqE" }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "7mW1cqv2PrqH" }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "08AvDFQoPrqJ" }, "outputs": [], "source": [] } ], "metadata": { "colab": { "name": "01-titanic-feature-creation.ipynb", "provenance": [], "version": "0.3.2" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }