# using pandas to load in data from a URL
import pandas as pd
url="https://docs.google.com/spreadsheets/d/1VMaw2oCn0ABitd-alLAEsEhGS1Je2UFNLu76TKrIH7w/gviz/tq?tqx=out:csv&sheet=Raw_Medical_Data_for_day1"
df = pd.read_csv(url)
# initial peek into Data
df
Unnamed: 0 | State | Lat | Lng | Area | Children | Age | Income | Marital | Gender | ... | Hyperlipidemia | BackPain | Anxiety | Allergic_rhinitis | Reflux_esophagitis | Asthma | Services | Initial_days | TotalCharge | Additional_charges | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | AL | 34.34960 | -86.72508 | Suburban | 1.0 | 53 | 86575.93 | Divorced | Male | ... | 0.0 | 1.0 | 1.0 | 1.0 | 0 | 1 | Blood Work | 10.585770 | 3726.702860 | 17939.403420 |
1 | 2 | FL | 30.84513 | -85.22907 | Urban | 3.0 | 51 | 46805.99 | Married | Female | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1 | 0 | Intravenous | 15.129562 | 4193.190458 | 17612.998120 |
2 | 3 | SD | 43.54321 | -96.63772 | Suburban | 3.0 | 53 | 14370.14 | Widowed | Female | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0 | Blood Work | 4.772177 | 2434.234222 | 17505.192460 |
3 | 4 | MN | 43.89744 | -93.51479 | Suburban | 0.0 | 78 | 39741.49 | Married | Male | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1 | 1 | Blood Work | 1.714879 | 2127.830423 | 12993.437350 |
4 | 5 | VA | 37.59894 | -76.88958 | Rural | 1.0 | 22 | 1209.56 | Widowed | Female | ... | 1.0 | 0.0 | 0.0 | 1.0 | 0 | 0 | CT Scan | 1.254807 | 2113.073274 | 3716.525786 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
995 | 996 | VA | 37.13892 | -81.06430 | Suburban | 3.0 | 23 | 37655.11 | Separated | Male | ... | 1.0 | 0.0 | 0.0 | 0.0 | 1 | 0 | Blood Work | 1.669799 | 2747.196682 | 8480.414406 |
996 | 997 | MI | 42.20472 | -83.20589 | Urban | 2.0 | 76 | 33475.93 | Divorced | Male | ... | NaN | 1.0 | 1.0 | 1.0 | 0 | 1 | Intravenous | 11.917145 | 3661.760740 | 25260.816290 |
997 | 998 | AZ | 35.04266 | -110.32518 | Urban | 0.0 | 67 | 21360.91 | Married | Male | ... | 1.0 | 1.0 | 0.0 | 0.0 | 0 | 0 | Intravenous | 30.858920 | 5259.932456 | 22317.047440 |
998 | 999 | NC | 36.11344 | -80.63370 | Suburban | 0.0 | 65 | 23473.98 | Never Married | Male | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 1 | Blood Work | 7.856833 | 3083.731857 | 11225.387230 |
999 | 1000 | ND | 46.37119 | -98.49156 | Urban | 1.0 | 73 | 70649.05 | Divorced | Female | ... | 0.0 | 1.0 | 0.0 | 0.0 | 0 | 0 | Intravenous | 4.599411 | 2830.020820 | 12326.213460 |
1000 rows × 33 columns
# filtering the Data for entries with multiple children(more than 2)
many_children= df['Children'] > 3
many_children
0 False 1 True 2 True 3 False 4 False ... 995 True 996 True 997 False 998 False 999 False Name: Children, Length: 1000, dtype: bool
filtered_df= df[many_children]
filtered_df
Unnamed: 0 | State | Lat | Lng | Area | Children | Age | Income | Marital | Gender | ... | Hyperlipidemia | BackPain | Anxiety | Allergic_rhinitis | Reflux_esophagitis | Asthma | Services | Initial_days | TotalCharge | Additional_charges | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 2 | FL | 30.84513 | -85.22907 | Urban | 3.0 | 51 | 46805.99 | Married | Female | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1 | 0 | Intravenous | 15.129562 | 4193.190458 | 17612.998120 |
2 | 3 | SD | 43.54321 | -96.63772 | Suburban | 3.0 | 53 | 14370.14 | Widowed | Female | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0 | Blood Work | 4.772177 | 2434.234222 | 17505.192460 |
5 | 6 | OK | 35.67302 | -95.19180 | Urban | 3.0 | 76 | 81999.88 | Never Married | Male | ... | 0.0 | 1.0 | 0.0 | 1.0 | 0 | 0 | Blood Work | 5.957250 | 2636.691180 | 12742.589910 |
7 | 8 | VA | 39.08062 | -78.39150 | Urban | 7.0 | 40 | 38319.29 | Divorced | Female | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0 | Intravenous | 14.228019 | 3021.499039 | 6930.572138 |
9 | 10 | MN | 45.40325 | -94.71424 | Urban | 2.0 | 78 | 38965.22 | Never Married | Female | ... | 0.0 | 0.0 | 0.0 | 1.0 | 1 | 1 | Blood Work | 1.632554 | 3147.855813 | 26225.989910 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
991 | 992 | AL | 34.96594 | -87.12179 | Urban | 5.0 | 78 | 22669.31 | Married | Male | ... | 1.0 | 1.0 | 0.0 | 0.0 | 1 | 0 | Blood Work | 5.694359 | 3073.408768 | 13466.864900 |
992 | 993 | OH | 38.98139 | -81.89771 | Urban | 2.0 | 30 | 68421.30 | Never Married | Female | ... | 1.0 | 1.0 | 0.0 | 0.0 | 0 | 0 | Blood Work | 35.269743 | 4996.699423 | 5132.506653 |
994 | 995 | AL | 33.87665 | -88.13869 | Rural | 3.0 | 59 | 40160.19 | Widowed | Male | ... | 1.0 | 1.0 | 0.0 | 0.0 | 1 | 1 | Blood Work | 13.894693 | 3892.480540 | 11055.427760 |
995 | 996 | VA | 37.13892 | -81.06430 | Suburban | 3.0 | 23 | 37655.11 | Separated | Male | ... | 1.0 | 0.0 | 0.0 | 0.0 | 1 | 0 | Blood Work | 1.669799 | 2747.196682 | 8480.414406 |
996 | 997 | MI | 42.20472 | -83.20589 | Urban | 2.0 | 76 | 33475.93 | Divorced | Male | ... | NaN | 1.0 | 1.0 | 1.0 | 0 | 1 | Intravenous | 11.917145 | 3661.760740 | 25260.816290 |
470 rows × 33 columns
# additionally filter rows that live in the state of FLORIDA.
floridians= filtered_df['State'].str.contains('FL')
# apply filter to Data
filtered_df.loc[floridians]
Unnamed: 0 | State | Lat | Lng | Area | Children | Age | Income | Marital | Gender | ... | Hyperlipidemia | BackPain | Anxiety | Allergic_rhinitis | Reflux_esophagitis | Asthma | Services | Initial_days | TotalCharge | Additional_charges | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 2 | FL | 30.84513 | -85.22907 | Urban | 3.0 | 51 | 46805.99 | Married | Female | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1 | 0 | Intravenous | 15.129562 | 4193.190458 | 17612.998120 |
58 | 59 | FL | 30.23936 | -85.65202 | Rural | 4.0 | 86 | 132963.95 | Never Married | Male | ... | 1.0 | 0.0 | 0.0 | 1.0 | 0 | 0 | CT Scan | 8.298685 | 2762.184305 | 14397.946210 |
129 | 130 | FL | 28.83557 | -82.27173 | Suburban | 3.0 | 45 | 10479.91 | Divorced | Female | ... | 1.0 | 0.0 | 0.0 | 0.0 | 1 | 0 | Blood Work | 10.763288 | 2891.276754 | 7701.050429 |
156 | 157 | FL | 25.77789 | -80.17412 | Urban | 10.0 | 70 | 12736.92 | Married | Female | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 1 | Blood Work | 5.146599 | 2349.337935 | 11840.359160 |
166 | 167 | FL | 28.69295 | -81.79654 | Rural | 3.0 | 19 | 56007.17 | Never Married | Male | ... | 0.0 | 1.0 | 0.0 | 1.0 | 1 | 0 | Intravenous | 11.566498 | 3268.304281 | 6698.594263 |
198 | 199 | FL | 26.53400 | -80.08297 | Suburban | 3.0 | 19 | 80233.71 | Widowed | Male | ... | 1.0 | 1.0 | 0.0 | 1.0 | 1 | 1 | CT Scan | 22.757338 | 4532.093567 | 4560.232892 |
263 | 264 | FL | 30.67981 | -87.19822 | Suburban | 4.0 | 48 | 7218.62 | Divorced | Female | ... | 1.0 | 0.0 | 0.0 | 1.0 | 0 | 1 | Intravenous | 8.542972 | 3410.099411 | 16344.886300 |
412 | 413 | FL | 28.58554 | -81.68218 | Rural | 4.0 | 79 | 16012.05 | Never Married | Female | ... | 0.0 | 0.0 | 1.0 | 0.0 | 1 | 0 | Intravenous | 4.380309 | 2921.041891 | 13582.895340 |
422 | 423 | FL | 25.98245 | -80.34359 | Urban | 4.0 | 81 | 4124.82 | Separated | Female | ... | 0.0 | 0.0 | 1.0 | 1.0 | 0 | 1 | Intravenous | 2.445696 | 2691.473859 | 13900.778910 |
448 | 449 | FL | 29.02018 | -81.90951 | Urban | 2.0 | 80 | 67799.74 | Widowed | Female | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0 | 0 | Intravenous | 2.223536 | 2823.841624 | 26402.635950 |
817 | 818 | FL | 30.35770 | -83.98848 | Rural | 2.0 | 79 | 45372.00 | Married | Female | ... | 0.0 | 1.0 | 0.0 | 0.0 | 1 | 0 | Intravenous | 10.587203 | 2868.004550 | 12906.990430 |
933 | 934 | FL | 27.24900 | -80.38129 | Suburban | 4.0 | 55 | 69278.99 | Divorced | Female | ... | 0.0 | 1.0 | 0.0 | 1.0 | 1 | 0 | Blood Work | 20.375339 | 4314.878422 | 10286.372530 |
12 rows × 33 columns
married_or_not= filtered_df['Marital'].str.contains('Married')
filtered_df.loc[married_or_not]
Unnamed: 0 | State | Lat | Lng | Area | Children | Age | Income | Marital | Gender | ... | Hyperlipidemia | BackPain | Anxiety | Allergic_rhinitis | Reflux_esophagitis | Asthma | Services | Initial_days | TotalCharge | Additional_charges | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 2 | FL | 30.84513 | -85.22907 | Urban | 3.0 | 51 | 46805.99 | Married | Female | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1 | 0 | Intravenous | 15.129562 | 4193.190458 | 17612.998120 |
5 | 6 | OK | 35.67302 | -95.19180 | Urban | 3.0 | 76 | 81999.88 | Never Married | Male | ... | 0.0 | 1.0 | 0.0 | 1.0 | 0 | 0 | Blood Work | 5.957250 | 2636.691180 | 12742.589910 |
9 | 10 | MN | 45.40325 | -94.71424 | Urban | 2.0 | 78 | 38965.22 | Never Married | Female | ... | 0.0 | 0.0 | 0.0 | 1.0 | 1 | 1 | Blood Work | 1.632554 | 3147.855813 | 26225.989910 |
10 | 11 | MS | 32.59914 | -88.67903 | Urban | 4.0 | 55 | 38503.82 | Married | Female | ... | 0.0 | 1.0 | 0.0 | 0.0 | 1 | 1 | Intravenous | 2.595912 | 2837.861788 | 18518.043170 |
28 | 29 | OH | 41.45764 | -81.02602 | Urban | 2.0 | 69 | 23640.03 | Married | Male | ... | 0.0 | 0.0 | 1.0 | 1.0 | 0 | 0 | Intravenous | 13.486144 | 3804.004093 | 22890.096790 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
965 | 966 | AK | 63.75233 | -171.68815 | Urban | 2.0 | 18 | 52978.87 | Married | Male | ... | 1.0 | 1.0 | 0.0 | 1.0 | 0 | 0 | Intravenous | 9.565067 | 3466.670408 | 4308.496743 |
968 | 969 | IA | 42.16570 | -92.29954 | Rural | 3.0 | 19 | 70131.73 | Never Married | Male | ... | 1.0 | 0.0 | 0.0 | 0.0 | 1 | 0 | Intravenous | 11.920815 | 3682.719334 | 7033.580425 |
969 | 970 | AR | 36.17748 | -90.81964 | Suburban | 4.0 | 56 | 45774.65 | Married | Female | ... | 1.0 | 1.0 | 0.0 | 1.0 | 1 | 0 | CT Scan | 4.317152 | 3093.095379 | 9821.010019 |
991 | 992 | AL | 34.96594 | -87.12179 | Urban | 5.0 | 78 | 22669.31 | Married | Male | ... | 1.0 | 1.0 | 0.0 | 0.0 | 1 | 0 | Blood Work | 5.694359 | 3073.408768 | 13466.864900 |
992 | 993 | OH | 38.98139 | -81.89771 | Urban | 2.0 | 30 | 68421.30 | Never Married | Female | ... | 1.0 | 1.0 | 0.0 | 0.0 | 0 | 0 | Blood Work | 35.269743 | 4996.699423 | 5132.506653 |
184 rows × 33 columns
married= filtered_df['Marital']=='Married'
filtered_df.loc[married]
Unnamed: 0 | State | Lat | Lng | Area | Children | Age | Income | Marital | Gender | ... | Hyperlipidemia | BackPain | Anxiety | Allergic_rhinitis | Reflux_esophagitis | Asthma | Services | Initial_days | TotalCharge | Additional_charges | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 2 | FL | 30.84513 | -85.22907 | Urban | 3.0 | 51 | 46805.99 | Married | Female | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1 | 0 | Intravenous | 15.129562 | 4193.190458 | 17612.998120 |
10 | 11 | MS | 32.59914 | -88.67903 | Urban | 4.0 | 55 | 38503.82 | Married | Female | ... | 0.0 | 1.0 | 0.0 | 0.0 | 1 | 1 | Intravenous | 2.595912 | 2837.861788 | 18518.043170 |
28 | 29 | OH | 41.45764 | -81.02602 | Urban | 2.0 | 69 | 23640.03 | Married | Male | ... | 0.0 | 0.0 | 1.0 | 1.0 | 0 | 0 | Intravenous | 13.486144 | 3804.004093 | 22890.096790 |
39 | 40 | PA | 40.94827 | -78.32867 | Suburban | 3.0 | 70 | 13059.24 | Married | Female | ... | 0.0 | 1.0 | 0.0 | 0.0 | 0 | 0 | Intravenous | 10.452610 | 2872.499307 | 11658.486190 |
96 | 97 | MD | 38.34475 | -75.58258 | Rural | 4.0 | 82 | 6172.28 | Married | Male | ... | 0.0 | NaN | 0.0 | 0.0 | 1 | 0 | Blood Work | 2.368128 | 2693.679414 | 14078.404620 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
954 | 955 | MS | 34.19267 | -89.42174 | Suburban | 4.0 | 77 | 32513.06 | Married | Female | ... | 1.0 | 0.0 | 0.0 | 0.0 | 1 | 0 | Intravenous | 1.311287 | 2629.123428 | 13060.891880 |
962 | 963 | TX | 29.35664 | -99.87147 | Urban | 4.0 | 79 | 138456.69 | Married | Female | ... | 1.0 | 1.0 | 1.0 | 0.0 | 0 | 0 | CT Scan | 3.372042 | 2981.508145 | 13507.818050 |
965 | 966 | AK | 63.75233 | -171.68815 | Urban | 2.0 | 18 | 52978.87 | Married | Male | ... | 1.0 | 1.0 | 0.0 | 1.0 | 0 | 0 | Intravenous | 9.565067 | 3466.670408 | 4308.496743 |
969 | 970 | AR | 36.17748 | -90.81964 | Suburban | 4.0 | 56 | 45774.65 | Married | Female | ... | 1.0 | 1.0 | 0.0 | 1.0 | 1 | 0 | CT Scan | 4.317152 | 3093.095379 | 9821.010019 |
991 | 992 | AL | 34.96594 | -87.12179 | Urban | 5.0 | 78 | 22669.31 | Married | Male | ... | 1.0 | 1.0 | 0.0 | 0.0 | 1 | 0 | Blood Work | 5.694359 | 3073.408768 | 13466.864900 |
101 rows × 33 columns
filtered_df[~married]
Unnamed: 0 | State | Lat | Lng | Area | Children | Age | Income | Marital | Gender | ... | Hyperlipidemia | BackPain | Anxiety | Allergic_rhinitis | Reflux_esophagitis | Asthma | Services | Initial_days | TotalCharge | Additional_charges | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2 | 3 | SD | 43.54321 | -96.63772 | Suburban | 3.0 | 53 | 14370.14 | Widowed | Female | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0 | Blood Work | 4.772177 | 2434.234222 | 17505.192460 |
5 | 6 | OK | 35.67302 | -95.19180 | Urban | 3.0 | 76 | 81999.88 | Never Married | Male | ... | 0.0 | 1.0 | 0.0 | 1.0 | 0 | 0 | Blood Work | 5.957250 | 2636.691180 | 12742.589910 |
7 | 8 | VA | 39.08062 | -78.39150 | Urban | 7.0 | 40 | 38319.29 | Divorced | Female | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0 | Intravenous | 14.228019 | 3021.499039 | 6930.572138 |
9 | 10 | MN | 45.40325 | -94.71424 | Urban | 2.0 | 78 | 38965.22 | Never Married | Female | ... | 0.0 | 0.0 | 0.0 | 1.0 | 1 | 1 | Blood Work | 1.632554 | 3147.855813 | 26225.989910 |
16 | 17 | CA | 33.74647 | -114.66793 | Suburban | 10.0 | 44 | 24250.51 | Divorced | Male | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0 | Intravenous | 7.038432 | 2944.722978 | 8157.752223 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
990 | 991 | VA | 37.13929 | -82.33459 | Rural | 4.0 | 40 | 16994.89 | Widowed | Male | ... | 0.0 | 1.0 | 1.0 | 0.0 | 1 | 0 | Intravenous | 17.345927 | 4095.440620 | 7405.949286 |
992 | 993 | OH | 38.98139 | -81.89771 | Urban | 2.0 | 30 | 68421.30 | Never Married | Female | ... | 1.0 | 1.0 | 0.0 | 0.0 | 0 | 0 | Blood Work | 35.269743 | 4996.699423 | 5132.506653 |
994 | 995 | AL | 33.87665 | -88.13869 | Rural | 3.0 | 59 | 40160.19 | Widowed | Male | ... | 1.0 | 1.0 | 0.0 | 0.0 | 1 | 1 | Blood Work | 13.894693 | 3892.480540 | 11055.427760 |
995 | 996 | VA | 37.13892 | -81.06430 | Suburban | 3.0 | 23 | 37655.11 | Separated | Male | ... | 1.0 | 0.0 | 0.0 | 0.0 | 1 | 0 | Blood Work | 1.669799 | 2747.196682 | 8480.414406 |
996 | 997 | MI | 42.20472 | -83.20589 | Urban | 2.0 | 76 | 33475.93 | Divorced | Male | ... | NaN | 1.0 | 1.0 | 1.0 | 0 | 1 | Intravenous | 11.917145 | 3661.760740 | 25260.816290 |
369 rows × 33 columns