With this dataset I would like to seek out answers on how to have a healthy lifestyle while pursing a tech job. I would like to inform others and myself on what is an ideal tech job for ones mental health and how to have a healthy work life balance.

InĀ [9]:
!pip install numpy pandas matplotlib seaborn
Requirement already satisfied: numpy in /opt/anaconda3/lib/python3.12/site-packages (1.26.4)
Requirement already satisfied: pandas in /opt/anaconda3/lib/python3.12/site-packages (2.2.2)
Requirement already satisfied: matplotlib in /opt/anaconda3/lib/python3.12/site-packages (3.8.4)
Requirement already satisfied: seaborn in /opt/anaconda3/lib/python3.12/site-packages (0.13.2)
Requirement already satisfied: python-dateutil>=2.8.2 in /opt/anaconda3/lib/python3.12/site-packages (from pandas) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in /opt/anaconda3/lib/python3.12/site-packages (from pandas) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in /opt/anaconda3/lib/python3.12/site-packages (from pandas) (2023.3)
Requirement already satisfied: contourpy>=1.0.1 in /opt/anaconda3/lib/python3.12/site-packages (from matplotlib) (1.2.0)
Requirement already satisfied: cycler>=0.10 in /opt/anaconda3/lib/python3.12/site-packages (from matplotlib) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in /opt/anaconda3/lib/python3.12/site-packages (from matplotlib) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in /opt/anaconda3/lib/python3.12/site-packages (from matplotlib) (1.4.4)
Requirement already satisfied: packaging>=20.0 in /opt/anaconda3/lib/python3.12/site-packages (from matplotlib) (23.2)
Requirement already satisfied: pillow>=8 in /opt/anaconda3/lib/python3.12/site-packages (from matplotlib) (10.3.0)
Requirement already satisfied: pyparsing>=2.3.1 in /opt/anaconda3/lib/python3.12/site-packages (from matplotlib) (3.0.9)
Requirement already satisfied: six>=1.5 in /opt/anaconda3/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)
InĀ [10]:
import numpy as np
InĀ [11]:
import pandas as pd
InĀ [12]:
import matplotlib.pyplot as plt
InĀ [13]:
health_df = pd.read_csv("survey.csv")
InĀ [8]:
health_df.head()
Out[8]:
Timestamp Age Gender Country state self_employed family_history treatment work_interfere no_employees ... leave mental_health_consequence phys_health_consequence coworkers supervisor mental_health_interview phys_health_interview mental_vs_physical obs_consequence comments
0 2014-08-27 11:29:31 37 Female United States IL NaN No Yes Often 6-25 ... Somewhat easy No No Some of them Yes No Maybe Yes No NaN
1 2014-08-27 11:29:37 44 M United States IN NaN No No Rarely More than 1000 ... Don't know Maybe No No No No No Don't know No NaN
2 2014-08-27 11:29:44 32 Male Canada NaN NaN No No Rarely 6-25 ... Somewhat difficult No No Yes Yes Yes Yes No No NaN
3 2014-08-27 11:29:46 31 Male United Kingdom NaN NaN Yes Yes Often 26-100 ... Somewhat difficult Yes Yes Some of them No Maybe Maybe No Yes NaN
4 2014-08-27 11:30:22 31 Male United States TX NaN No No Never 100-500 ... Don't know No No Some of them Yes Yes Yes Don't know No NaN

5 rows Ɨ 27 columns

InĀ [9]:
health_df.dtypes
Out[9]:
Timestamp                    object
Age                           int64
Gender                       object
Country                      object
state                        object
self_employed                object
family_history               object
treatment                    object
work_interfere               object
no_employees                 object
remote_work                  object
tech_company                 object
benefits                     object
care_options                 object
wellness_program             object
seek_help                    object
anonymity                    object
leave                        object
mental_health_consequence    object
phys_health_consequence      object
coworkers                    object
supervisor                   object
mental_health_interview      object
phys_health_interview        object
mental_vs_physical           object
obs_consequence              object
comments                     object
dtype: object
InĀ [10]:
health_df.shape
Out[10]:
(1259, 27)
InĀ [11]:
health_df['work_interfere']
Out[11]:
0           Often
1          Rarely
2          Rarely
3           Often
4           Never
          ...    
1254          NaN
1255        Often
1256    Sometimes
1257          NaN
1258    Sometimes
Name: work_interfere, Length: 1259, dtype: object
InĀ [15]:
health_df[health_df['work_interfere'] == 'Often']
Out[15]:
Timestamp Age Gender Country state self_employed family_history treatment work_interfere no_employees ... leave mental_health_consequence phys_health_consequence coworkers supervisor mental_health_interview phys_health_interview mental_vs_physical obs_consequence comments
0 2014-08-27 11:29:31 37 Female United States IL NaN No Yes Often 6-25 ... Somewhat easy No No Some of them Yes No Maybe Yes No NaN
3 2014-08-27 11:29:46 31 Male United Kingdom NaN NaN Yes Yes Often 26-100 ... Somewhat difficult Yes Yes Some of them No Maybe Maybe No Yes NaN
22 2014-08-27 11:35:48 46 Male United States MA No No Yes Often 26-100 ... Don't know Maybe No Some of them Yes No Maybe No No NaN
43 2014-08-27 11:43:10 18 Male Netherlands NaN No No No Often 6-25 ... Somewhat difficult Yes Maybe No Some of them No No No No NaN
50 2014-08-27 11:44:55 22 M United States TX No Yes Yes Often 6-25 ... Very difficult Maybe No No No No Maybe Don't know No NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1245 2015-05-06 10:14:50 22 Male Australia NaN No Yes Yes Often 100-500 ... Don't know Maybe Maybe No Yes No Maybe Don't know Yes In australia all organisations of a certain si...
1247 2015-05-07 10:08:50 36 male Finland NaN No No Yes Often 6-25 ... Very difficult Yes No Some of them No No Maybe Don't know Yes NaN
1250 2015-07-27 23:25:34 30 Male United States CA Yes Yes Yes Often 26-100 ... Don't know No No Some of them Yes Maybe Maybe Yes No NaN
1251 2015-08-17 09:38:35 36 Male South Africa NaN No Yes Yes Often 100-500 ... Somewhat easy No No Some of them Yes No Yes Yes No NaN
1255 2015-09-26 01:07:35 32 Male United States IL No Yes Yes Often 26-100 ... Somewhat difficult No No Some of them Yes No No Yes No NaN

144 rows Ɨ 27 columns

InĀ [16]:
health_df.groupby('work_interfere').size().plot(kind='bar')
Out[16]:
<Axes: xlabel='work_interfere'>
No description has been provided for this image
InĀ [24]:
import seaborn as sns
InĀ [26]:
sns.lineplot(data = health_df, x='work_interfere', y='remote_work')
Out[26]:
<Axes: xlabel='work_interfere', ylabel='remote_work'>
No description has been provided for this image

health_df.plot(x='work-interference',y='remote_work', kind='scatter')

InĀ [29]:
health_df.describe()
Out[29]:
Age
count 1.259000e+03
mean 7.942815e+07
std 2.818299e+09
min -1.726000e+03
25% 2.700000e+01
50% 3.100000e+01
75% 3.600000e+01
max 1.000000e+11
InĀ [32]:
health_df['work_interfere']
Out[32]:
0           Often
1          Rarely
2          Rarely
3           Often
4           Never
          ...    
1254          NaN
1255        Often
1256    Sometimes
1257          NaN
1258    Sometimes
Name: work_interfere, Length: 1259, dtype: object
InĀ [34]:
health_df.isna()
Out[34]:
Timestamp Age Gender Country state self_employed family_history treatment work_interfere no_employees ... leave mental_health_consequence phys_health_consequence coworkers supervisor mental_health_interview phys_health_interview mental_vs_physical obs_consequence comments
0 False False False False False True False False False False ... False False False False False False False False False True
1 False False False False False True False False False False ... False False False False False False False False False True
2 False False False False True True False False False False ... False False False False False False False False False True
3 False False False False True True False False False False ... False False False False False False False False False True
4 False False False False False True False False False False ... False False False False False False False False False True
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1254 False False False False True False False False True False ... False False False False False False False False False True
1255 False False False False False False False False False False ... False False False False False False False False False True
1256 False False False False False False False False False False ... False False False False False False False False False True
1257 False False False False False False False False True False ... False False False False False False False False False True
1258 False False False False False False False False False False ... False False False False False False False False False True

1259 rows Ɨ 27 columns

InĀ [36]:
health_df.isna().sum()
Out[36]:
Timestamp                       0
Age                             0
Gender                          0
Country                         0
state                         515
self_employed                  18
family_history                  0
treatment                       0
work_interfere                264
no_employees                    0
remote_work                     0
tech_company                    0
benefits                        0
care_options                    0
wellness_program                0
seek_help                       0
anonymity                       0
leave                           0
mental_health_consequence       0
phys_health_consequence         0
coworkers                       0
supervisor                      0
mental_health_interview         0
phys_health_interview           0
mental_vs_physical              0
obs_consequence                 0
comments                     1095
dtype: int64
InĀ [38]:
health_df.shape
Out[38]:
(1259, 27)
InĀ [17]:
cleaned_df = health_df.dropna(subset=['work_interfere'])
InĀ [42]:
cleaned_df.shape
Out[42]:
(995, 27)
InĀ [44]:
cleaned_df.groupby('work_interfere').size().plot(kind='bar')
plt.title('Mental Health Interference')
plt.xlabel('Does Mental Health Interfere with your Work?')
plt.ylabel('Number of Patients')
Out[44]:
Text(0, 0.5, 'Number of Patients')
No description has been provided for this image
InĀ [29]:
sometimes_df = cleaned_df[cleaned_df['work_interfere'] == 'Sometimes']
InĀ [31]:
sometimes_df.head()
Out[31]:
Timestamp Age Gender Country state self_employed family_history treatment work_interfere no_employees ... leave mental_health_consequence phys_health_consequence coworkers supervisor mental_health_interview phys_health_interview mental_vs_physical obs_consequence comments
5 2014-08-27 11:31:22 33 Male United States TN NaN Yes No Sometimes 6-25 ... Don't know No No Yes Yes No Maybe Don't know No NaN
6 2014-08-27 11:31:50 35 Female United States MI NaN Yes Yes Sometimes 1-5 ... Somewhat difficult Maybe Maybe Some of them No No No Don't know No NaN
8 2014-08-27 11:32:39 42 Female United States IL NaN Yes Yes Sometimes 100-500 ... Very difficult Maybe No Yes Yes No Maybe No No NaN
10 2014-08-27 11:32:44 31 Male United States OH NaN No Yes Sometimes 6-25 ... Don't know No No Some of them Yes No No Don't know No NaN
12 2014-08-27 11:33:23 42 female United States CA NaN Yes Yes Sometimes 26-100 ... Somewhat difficult Yes Yes Yes Yes Maybe Maybe No Yes NaN

5 rows Ɨ 27 columns

InĀ [50]:
sometimes_df.shape
Out[50]:
(465, 27)
InĀ [52]:
sometimes_df.groupby('remote_work').size().plot(kind='bar')
plt.title('Sometimes Feel Mental Health Interference')
plt.xlabel('Do You Work from Home?')
plt.ylabel('Number of Patients')
Out[52]:
Text(0, 0.5, 'Number of Patients')
No description has been provided for this image
InĀ [33]:
often_df = cleaned_df[cleaned_df['work_interfere'] == 'Often']
InĀ [56]:
often_df.shape
Out[56]:
(144, 27)
InĀ [58]:
often_df.groupby('remote_work').size().plot(kind='bar')
plt.title('Often Feel Mental Health Interference')
plt.xlabel('Do You Work from Home?')
plt.ylabel('Number of Patients')
Out[58]:
Text(0, 0.5, 'Number of Patients')
No description has been provided for this image
InĀ [35]:
Rarely_df = cleaned_df[cleaned_df['work_interfere'] == 'Rarely']
InĀ [62]:
Rarely_df.groupby('remote_work').size().plot(kind='bar')
plt.title('Rarely Feel Mental Health Interference')
plt.xlabel('Do You Work from Home?')
plt.ylabel('Number of Patients')
Out[62]:
Text(0, 0.5, 'Number of Patients')
No description has been provided for this image
InĀ [37]:
Never_df = cleaned_df[cleaned_df['work_interfere'] == 'Never']
InĀ [66]:
Never_df.groupby('remote_work').size().plot(kind='bar')
plt.title('Rarely Feel Mental Health Interference')
plt.xlabel('Do You Work from Home?')
plt.ylabel('Number of Patients')
Out[66]:
Text(0, 0.5, 'Number of Patients')
No description has been provided for this image
InĀ [89]:
sometimes_number = sometimes_df.groupby('remote_work').size()
often_number = often_df.groupby('remote_work').size()
rarely_number = Rarely_df.groupby('remote_work').size()

all_df = pd.DataFrame({
    'Sometimes': sometimes_number,
    'Often': often_number,
    'Rarely': rarely_number
})

combined_df.plot(kind='bar')

plt.title('Mental Health Interference by Work from Home Status')
plt.xlabel('Do You Work from Home?')
plt.ylabel('Number of Patients')

plt.legend(title='Frequency of Mental Health Interference')
Out[89]:
<matplotlib.legend.Legend at 0x1333313d0>
No description has been provided for this image
InĀ [41]:
sometimes_number_1 = sometimes_df.groupby('self_employed').size()
often_number_1 = often_df.groupby('self_employed').size()
rarely_number_1 = Rarely_df.groupby('self_employed').size()

all_df = pd.DataFrame({
    'Sometimes': sometimes_number_1,
    'Often': often_number_1,
    'Rarely': rarely_number_1
})

all_df.plot(kind='bar')

plt.title('Mental Health Interference by Work from Self Employed')
plt.xlabel('Are you self Employed?')
plt.ylabel('Number of Patients')

plt.legend(title='Frequency of Mental Health Interference')
Out[41]:
<matplotlib.legend.Legend at 0x12e2f0170>
No description has been provided for this image
InĀ [47]:
sometimes_number = sometimes_df.groupby('tech_company').size()
often_number = often_df.groupby('tech_company').size()
rarely_number = Rarely_df.groupby('tech_company').size()

all_df = pd.DataFrame({
    'Sometimes': sometimes_number,
    'Often': often_number,
    'Rarely': rarely_number
})

all_df.plot(kind='bar')

plt.title('Mental Health Interference by Work for Tech Company')
plt.xlabel('Is your employer primarily a tech company/organization?')
plt.ylabel('Number of Patients')

plt.legend(title='Frequency of Mental Health Interference')
Out[47]:
<matplotlib.legend.Legend at 0x12e578320>
No description has been provided for this image