!pip install numpy pandas matplotlib seaborn

Requirement already satisfied: numpy in /opt/anaconda3/lib/python3.12/site-packages (1.26.4)
Requirement already satisfied: pandas in /opt/anaconda3/lib/python3.12/site-packages (2.2.2)
Requirement already satisfied: matplotlib in /opt/anaconda3/lib/python3.12/site-packages (3.8.4)
Requirement already satisfied: seaborn in /opt/anaconda3/lib/python3.12/site-packages (0.13.2)
Requirement already satisfied: python-dateutil>=2.8.2 in /opt/anaconda3/lib/python3.12/site-packages (from pandas) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in /opt/anaconda3/lib/python3.12/site-packages (from pandas) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in /opt/anaconda3/lib/python3.12/site-packages (from pandas) (2023.3)
Requirement already satisfied: contourpy>=1.0.1 in /opt/anaconda3/lib/python3.12/site-packages (from matplotlib) (1.2.0)
Requirement already satisfied: cycler>=0.10 in /opt/anaconda3/lib/python3.12/site-packages (from matplotlib) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in /opt/anaconda3/lib/python3.12/site-packages (from matplotlib) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in /opt/anaconda3/lib/python3.12/site-packages (from matplotlib) (1.4.4)
Requirement already satisfied: packaging>=20.0 in /opt/anaconda3/lib/python3.12/site-packages (from matplotlib) (23.2)
Requirement already satisfied: pillow>=8 in /opt/anaconda3/lib/python3.12/site-packages (from matplotlib) (10.3.0)
Requirement already satisfied: pyparsing>=2.3.1 in /opt/anaconda3/lib/python3.12/site-packages (from matplotlib) (3.0.9)
Requirement already satisfied: six>=1.5 in /opt/anaconda3/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

health_df = pd.read_csv("survey.csv")

health_df.head()

health_df.dtypes

Timestamp                    object
Age                           int64
Gender                       object
Country                      object
state                        object
self_employed                object
family_history               object
treatment                    object
work_interfere               object
no_employees                 object
remote_work                  object
tech_company                 object
benefits                     object
care_options                 object
wellness_program             object
seek_help                    object
anonymity                    object
leave                        object
mental_health_consequence    object
phys_health_consequence      object
coworkers                    object
supervisor                   object
mental_health_interview      object
phys_health_interview        object
mental_vs_physical           object
obs_consequence              object
comments                     object
dtype: object

health_df.shape

(1259, 27)

health_df['work_interfere']

0           Often
1          Rarely
2          Rarely
3           Often
4           Never
          ...    
1254          NaN
1255        Often
1256    Sometimes
1257          NaN
1258    Sometimes
Name: work_interfere, Length: 1259, dtype: object

health_df[health_df['work_interfere'] == 'Often']

health_df.groupby('work_interfere').size().plot(kind='bar')

<Axes: xlabel='work_interfere'>

import seaborn as sns

sns.lineplot(data = health_df, x='work_interfere', y='remote_work')

<Axes: xlabel='work_interfere', ylabel='remote_work'>

health_df.describe()

health_df['work_interfere']

0           Often
1          Rarely
2          Rarely
3           Often
4           Never
          ...    
1254          NaN
1255        Often
1256    Sometimes
1257          NaN
1258    Sometimes
Name: work_interfere, Length: 1259, dtype: object

health_df.isna()

health_df.isna().sum()

Timestamp                       0
Age                             0
Gender                          0
Country                         0
state                         515
self_employed                  18
family_history                  0
treatment                       0
work_interfere                264
no_employees                    0
remote_work                     0
tech_company                    0
benefits                        0
care_options                    0
wellness_program                0
seek_help                       0
anonymity                       0
leave                           0
mental_health_consequence       0
phys_health_consequence         0
coworkers                       0
supervisor                      0
mental_health_interview         0
phys_health_interview           0
mental_vs_physical              0
obs_consequence                 0
comments                     1095
dtype: int64

health_df.shape

(1259, 27)

cleaned_df = health_df.dropna(subset=['work_interfere'])

cleaned_df.shape

(995, 27)

cleaned_df.groupby('work_interfere').size().plot(kind='bar')
plt.title('Mental Health Interference')
plt.xlabel('Does Mental Health Interfere with your Work?')
plt.ylabel('Number of Patients')

Text(0, 0.5, 'Number of Patients')

sometimes_df = cleaned_df[cleaned_df['work_interfere'] == 'Sometimes']

sometimes_df.head()

sometimes_df.shape

(465, 27)

sometimes_df.groupby('remote_work').size().plot(kind='bar')
plt.title('Sometimes Feel Mental Health Interference')
plt.xlabel('Do You Work from Home?')
plt.ylabel('Number of Patients')

Text(0, 0.5, 'Number of Patients')

often_df = cleaned_df[cleaned_df['work_interfere'] == 'Often']

often_df.shape

(144, 27)

often_df.groupby('remote_work').size().plot(kind='bar')
plt.title('Often Feel Mental Health Interference')
plt.xlabel('Do You Work from Home?')
plt.ylabel('Number of Patients')

Text(0, 0.5, 'Number of Patients')

Rarely_df = cleaned_df[cleaned_df['work_interfere'] == 'Rarely']

Rarely_df.groupby('remote_work').size().plot(kind='bar')
plt.title('Rarely Feel Mental Health Interference')
plt.xlabel('Do You Work from Home?')
plt.ylabel('Number of Patients')

Text(0, 0.5, 'Number of Patients')

Never_df = cleaned_df[cleaned_df['work_interfere'] == 'Never']

Never_df.groupby('remote_work').size().plot(kind='bar')
plt.title('Rarely Feel Mental Health Interference')
plt.xlabel('Do You Work from Home?')
plt.ylabel('Number of Patients')

Text(0, 0.5, 'Number of Patients')

sometimes_number = sometimes_df.groupby('remote_work').size()
often_number = often_df.groupby('remote_work').size()
rarely_number = Rarely_df.groupby('remote_work').size()

all_df = pd.DataFrame({
    'Sometimes': sometimes_number,
    'Often': often_number,
    'Rarely': rarely_number
})

combined_df.plot(kind='bar')

plt.title('Mental Health Interference by Work from Home Status')
plt.xlabel('Do You Work from Home?')
plt.ylabel('Number of Patients')

plt.legend(title='Frequency of Mental Health Interference')

<matplotlib.legend.Legend at 0x1333313d0>

sometimes_number_1 = sometimes_df.groupby('self_employed').size()
often_number_1 = often_df.groupby('self_employed').size()
rarely_number_1 = Rarely_df.groupby('self_employed').size()

all_df = pd.DataFrame({
    'Sometimes': sometimes_number_1,
    'Often': often_number_1,
    'Rarely': rarely_number_1
})

all_df.plot(kind='bar')

plt.title('Mental Health Interference by Work from Self Employed')
plt.xlabel('Are you self Employed?')
plt.ylabel('Number of Patients')

plt.legend(title='Frequency of Mental Health Interference')

<matplotlib.legend.Legend at 0x12e2f0170>

sometimes_number = sometimes_df.groupby('tech_company').size()
often_number = often_df.groupby('tech_company').size()
rarely_number = Rarely_df.groupby('tech_company').size()

all_df = pd.DataFrame({
    'Sometimes': sometimes_number,
    'Often': often_number,
    'Rarely': rarely_number
})

all_df.plot(kind='bar')

plt.title('Mental Health Interference by Work for Tech Company')
plt.xlabel('Is your employer primarily a tech company/organization?')
plt.ylabel('Number of Patients')

plt.legend(title='Frequency of Mental Health Interference')

<matplotlib.legend.Legend at 0x12e578320>

	Timestamp	Age	Gender	Country	state	self_employed	family_history	treatment	work_interfere	no_employees	...	leave	mental_health_consequence	phys_health_consequence	coworkers	supervisor	mental_health_interview	phys_health_interview	mental_vs_physical	obs_consequence	comments
0	2014-08-27 11:29:31	37	Female	United States	IL	NaN	No	Yes	Often	6-25	...	Somewhat easy	No	No	Some of them	Yes	No	Maybe	Yes	No	NaN
1	2014-08-27 11:29:37	44	M	United States	IN	NaN	No	No	Rarely	More than 1000	...	Don't know	Maybe	No	No	No	No	No	Don't know	No	NaN
2	2014-08-27 11:29:44	32	Male	Canada	NaN	NaN	No	No	Rarely	6-25	...	Somewhat difficult	No	No	Yes	Yes	Yes	Yes	No	No	NaN
3	2014-08-27 11:29:46	31	Male	United Kingdom	NaN	NaN	Yes	Yes	Often	26-100	...	Somewhat difficult	Yes	Yes	Some of them	No	Maybe	Maybe	No	Yes	NaN
4	2014-08-27 11:30:22	31	Male	United States	TX	NaN	No	No	Never	100-500	...	Don't know	No	No	Some of them	Yes	Yes	Yes	Don't know	No	NaN

	Timestamp	Age	Gender	Country	state	self_employed	family_history	treatment	work_interfere	no_employees	...	leave	mental_health_consequence	phys_health_consequence	coworkers	supervisor	mental_health_interview	phys_health_interview	mental_vs_physical	obs_consequence	comments
0	2014-08-27 11:29:31	37	Female	United States	IL	NaN	No	Yes	Often	6-25	...	Somewhat easy	No	No	Some of them	Yes	No	Maybe	Yes	No	NaN
3	2014-08-27 11:29:46	31	Male	United Kingdom	NaN	NaN	Yes	Yes	Often	26-100	...	Somewhat difficult	Yes	Yes	Some of them	No	Maybe	Maybe	No	Yes	NaN
22	2014-08-27 11:35:48	46	Male	United States	MA	No	No	Yes	Often	26-100	...	Don't know	Maybe	No	Some of them	Yes	No	Maybe	No	No	NaN
43	2014-08-27 11:43:10	18	Male	Netherlands	NaN	No	No	No	Often	6-25	...	Somewhat difficult	Yes	Maybe	No	Some of them	No	No	No	No	NaN
50	2014-08-27 11:44:55	22	M	United States	TX	No	Yes	Yes	Often	6-25	...	Very difficult	Maybe	No	No	No	No	Maybe	Don't know	No	NaN
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1245	2015-05-06 10:14:50	22	Male	Australia	NaN	No	Yes	Yes	Often	100-500	...	Don't know	Maybe	Maybe	No	Yes	No	Maybe	Don't know	Yes	In australia all organisations of a certain si...
1247	2015-05-07 10:08:50	36	male	Finland	NaN	No	No	Yes	Often	6-25	...	Very difficult	Yes	No	Some of them	No	No	Maybe	Don't know	Yes	NaN
1250	2015-07-27 23:25:34	30	Male	United States	CA	Yes	Yes	Yes	Often	26-100	...	Don't know	No	No	Some of them	Yes	Maybe	Maybe	Yes	No	NaN
1251	2015-08-17 09:38:35	36	Male	South Africa	NaN	No	Yes	Yes	Often	100-500	...	Somewhat easy	No	No	Some of them	Yes	No	Yes	Yes	No	NaN
1255	2015-09-26 01:07:35	32	Male	United States	IL	No	Yes	Yes	Often	26-100	...	Somewhat difficult	No	No	Some of them	Yes	No	No	Yes	No	NaN

	Age
count	1.259000e+03
mean	7.942815e+07
std	2.818299e+09
min	-1.726000e+03
25%	2.700000e+01
50%	3.100000e+01
75%	3.600000e+01
max	1.000000e+11

	Timestamp	Age	Gender	Country	state	self_employed	family_history	treatment	work_interfere	no_employees	...	leave	mental_health_consequence	phys_health_consequence	coworkers	supervisor	mental_health_interview	phys_health_interview	mental_vs_physical	obs_consequence	comments
0	False	False	False	False	False	True	False	False	False	False	...	False	False	False	False	False	False	False	False	False	True
1	False	False	False	False	False	True	False	False	False	False	...	False	False	False	False	False	False	False	False	False	True
2	False	False	False	False	True	True	False	False	False	False	...	False	False	False	False	False	False	False	False	False	True
3	False	False	False	False	True	True	False	False	False	False	...	False	False	False	False	False	False	False	False	False	True
4	False	False	False	False	False	True	False	False	False	False	...	False	False	False	False	False	False	False	False	False	True
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1254	False	False	False	False	True	False	False	False	True	False	...	False	False	False	False	False	False	False	False	False	True
1255	False	False	False	False	False	False	False	False	False	False	...	False	False	False	False	False	False	False	False	False	True
1256	False	False	False	False	False	False	False	False	False	False	...	False	False	False	False	False	False	False	False	False	True
1257	False	False	False	False	False	False	False	False	True	False	...	False	False	False	False	False	False	False	False	False	True
1258	False	False	False	False	False	False	False	False	False	False	...	False	False	False	False	False	False	False	False	False	True

	Timestamp	Age	Gender	Country	state	self_employed	family_history	treatment	work_interfere	no_employees	...	leave	mental_health_consequence	phys_health_consequence	coworkers	supervisor	mental_health_interview	phys_health_interview	mental_vs_physical	obs_consequence	comments
5	2014-08-27 11:31:22	33	Male	United States	TN	NaN	Yes	No	Sometimes	6-25	...	Don't know	No	No	Yes	Yes	No	Maybe	Don't know	No	NaN
6	2014-08-27 11:31:50	35	Female	United States	MI	NaN	Yes	Yes	Sometimes	1-5	...	Somewhat difficult	Maybe	Maybe	Some of them	No	No	No	Don't know	No	NaN
8	2014-08-27 11:32:39	42	Female	United States	IL	NaN	Yes	Yes	Sometimes	100-500	...	Very difficult	Maybe	No	Yes	Yes	No	Maybe	No	No	NaN
10	2014-08-27 11:32:44	31	Male	United States	OH	NaN	No	Yes	Sometimes	6-25	...	Don't know	No	No	Some of them	Yes	No	No	Don't know	No	NaN
12	2014-08-27 11:33:23	42	female	United States	CA	NaN	Yes	Yes	Sometimes	26-100	...	Somewhat difficult	Yes	Yes	Yes	Yes	Maybe	Maybe	No	Yes	NaN