In [ ]:
 
In [1]:
import urllib.request, json
import pandas as pd

all_jobs = []
def get_data(i):
    url = "https://www.amazon.jobs/en/search.json?radius=24km&facets[]=location&facets[]=business_category&facets[]=category&facets[]=schedule_type_id&facets[]=employee_class&facets[]=normalized_location&facets[]=job_function_id&offset={}&result_limit=10&sort=relevant&latitude=&longitude=&loc_group_id=&loc_query=&base_query=data%20scientist&city=&country=&region=&county=&query_options=&".format(i)
    with urllib.request.urlopen(url) as url:
        data = json.loads(url.read().decode())
        return data

def add_data_to_df(data):
    for job in data['jobs']:
        all_jobs.append(job)

def do_the_thing():
    i = 10
#     while i < 21: for testing lololol
    while i < 3071:
        data = get_data(i)
        add_data_to_df(data)
        i += 10

do_the_thing()        
df = pd.DataFrame(all_jobs)
/Users/kendraryan/.pyenv/versions/3.7.3/lib/python3.7/site-packages/pandas/compat/__init__.py:117: UserWarning: Could not import the lzma module. Your installed Python is incomplete. Attempting to use lzma compression will result in a RuntimeError.
  warnings.warn(msg)
In [2]:
def get_label(team):
    try:
        return team['label']
    except:
        return 'no label'
df['team-label'] = df.apply(lambda x: get_label(x['team']), axis=1)
df.to_csv('aws_jobs_20210316.csv')

Remove anything outside the US

In [5]:
print('BEFORE', len(df))
df_us = df[df['country_code'] == 'US']
print('AFTER', len(df_us))
BEFORE 3070
AFTER 2511

Keep only jobs that include "data science" or "data scientist" in the title

In [49]:
df = df_us.copy()

def get_ds(title):
    if 'data scien' in title.lower():
        return True
    else: 
        return False
    
df['is_ds'] = df.apply(lambda x: get_ds(x['title']), axis=1)
df_ds = df[df['is_ds'] == True]
In [50]:
print('BEFORE', len(df_us))
print('AFTER', len(df_ds))
BEFORE 2511
AFTER 642

Remove "Senior" or "Sr" jobs

In [70]:
df = df_ds.copy()

def no_sr(title):
    if 'sr.' in title.lower() or 'senior' in title.lower() or 'sr ' in title.lower():
        return True
    else: 
        return False
    
df['is_sr'] = df.apply(lambda x: no_sr(x['title']), axis=1)
df_sr = df[df['is_sr'] == False]
In [71]:
print('BEFORE', len(df_ds))
print('AFTER', len(df_sr))
BEFORE 642
AFTER 350

Remove "Principal" or "Manager"

In [72]:
df = df_sr.copy()

def no_sr(title):
    if 'principal' in title.lower() or 'manager' in title.lower():
        return True
    else: 
        return False
    
df['is_pm'] = df.apply(lambda x: no_sr(x['title']), axis=1)
df_pm = df[df['is_pm'] == False]
In [73]:
print('BEFORE', len(df_sr))
print('AFTER', len(df_pm))
BEFORE 350
AFTER 226

How many of these 226 have unique "basic qualifications"?

In [75]:
df = df_pm.copy()
len(df['basic_qualifications'].value_counts())
Out[75]:
88

How many of these 226 have the same job title?

In [76]:
len(df['title'].value_counts())
Out[76]:
48

What's the job distribution for business category?

In [79]:
df['business_category'].value_counts()
Out[79]:
aws                                   148
advertising                             8
alexa                                   7
amazondevices                           7
finance                                 7
operations                              6
customer-trust-and-partner-support      6
global-corporate                        5
seller-services                         5
fulfillment-and-operations              4
digital-entertainment                   3
transportation-and-logistics            3
ecp                                     2
subsidiaries                            2
business-and-corporate-development      2
customer-service                        2
retail                                  2
primevideo                              2
studentprograms                         1
consumerpayments                        1
fulfillment-ops                         1
amazongo                                1
consumer_engagement                     1
Name: business_category, dtype: int64
In [85]:
df['business_category'].value_counts().plot(kind = 'barh')
Out[85]:
<AxesSubplot:>
In [86]:
len(df['team-label'].value_counts())
Out[86]:
46
In [87]:
df['title'].value_counts()
Out[87]:
Data Scientist - ProServe                                    61
Data Scientist - Nationwide Opportunities                    52
Data Scientist                                               43
Data Scientist -ProServe                                      7
Data Scientist II                                             6
Data Scientist                                                6
Data Scientist, YETI                                          3
Data Scientist III                                            3
Renewable Optimization Data Scientist                         3
Data Scientist I                                              2
Data Scientist II #0000                                       2
Data Scientist, Advertising                                   2
Data Scientist, Public Sector - AWS Professional Services     1
Data Scientist - Alexa Shopping                               1
Data Scientist – Failure Analysis                             1
Data Scientist - Fraud Prevention Team                        1
Data Scientist, Content Strategy                              1
Data Scientist, Community Operations                          1
Data Scientist, ML Service                                    1
Data Scientist, AWS Security- Baltimore                       1
ES Tech, Data Scientist - Forecasting                         1
Data Scientist- Marketing Analytics                           1
Data Scientist II, Funnel Insights                            1
Data Science II                                               1
Data Scientist - Prime Gaming                                 1
Data Scientist, AWS Partner Network                           1
Data Science Intern                                           1
Data Scientist, Customer Trust                                1
Data Scientist - Prime Video Efficiency Platform              1
Marketing Analytics Data Scientist                            1
Data Scientist, Simulation                                    1
Data Scientist - Device Economics                             1
Data Scientist, ProServe                                      1
Data Scientist - Sponsored Brands Recommendations             1
Data Scientist - Nationwide ProServe                          1
 Computer Vision Data Scientist                               1
Data Scientist, Alexa                                         1
Data scientist                                                1
Data Scientist, Transportation Execution                      1
Data Engineer - Fire TV Data Science & Analytics*             1
Data Scientist, WW Consumer Controllership Data Science       1
Data Scientist, Marketing                                     1
Data Scientist II - AWS Region Services (RBE Labs)            1
Data Scientist II - AMZ5008                                   1
Data Scientist - Demand Forecasting                           1
HCLS Data Scientist                                           1
Data Scientist- Amazon Business Payments                      1
Data Scientist - ProServe                                     1
Name: title, dtype: int64
In [88]:
len(df['title'].value_counts())
Out[88]:
48

NOTE: It appears that there are some simple typo errprs -- I have a feeling that

  • "Data Scientist - ProServe"
  • "Data Scientist -ProServe"
  • "Data Scientist, ProServe"

Are all actually the same job

In [39]:
# df = df_sr.copy()
# df_grouped = pd.DataFrame(df.groupby('basic_qualifications')['title'])
In [40]:
df
Out[40]:
basic_qualifications business_category city company_name country_code description description_short display_distance id id_icims ... source_system state title university_job updated_time url_next_step team team-label is_ds is_sr
0 · Bachelor's Degree<br/>· 1+ years of experien... alexa Bellevue Amazon.com Services LLC US The primary mission of ADECT Monitoring team i... The primary mission of ADECT Monitoring team i... None b05c630f-ac18-4412-abe2-52b53068dda8 1446680 ... Art WA Data Scientist None 21 days https://account.amazon.jobs/jobs/1446680/apply {'id': None, 'business_category_id': None, 'id... team-alexa-skills-voice-games True False
1 · Masters Degree in Statistics/Applied Mathema... fulfillment-and-operations Seattle Amazon.com Services LLC US The vision of Workforce Intelligence is to des... The vision of Workforce Intelligence is to des... None bc59d091-3c59-4272-85bf-bac8b7d8f524 1453488 ... Art WA Data Scientist- Marketing Analytics None 15 days https://account.amazon.jobs/jobs/1453488/apply {'id': None, 'business_category_id': None, 'id... team-workforce-staffing True False
3 · Bachelor's Degree<br/>· 3+ years of experien... finance Arlington Amazon.com Services LLC US Are you passionate about solving complex busin... Are you passionate about solving complex busin... None 86692d3f-8772-44a5-85c5-c60599129dbf 1385007 ... Art VA Data Scientist None 28 days https://account.amazon.jobs/jobs/1385007/apply {'id': None, 'business_category_id': None, 'id... team-fintech True False
4 · Master's degree in Mathematics, Statistics o... aws Seattle Amazon Dev Center U.S., Inc. US Would you like to help us build the next-gener... Would you like to help us build the next-gener... None 5a1d0b02-15ab-45f3-89fd-0a463a1260ab 1424688 ... Art WA Data Scientist None 18 days https://account.amazon.jobs/jobs/1424688/apply {'id': None, 'business_category_id': None, 'id... team-data-science-primary True False
6 · M.S. in Computer Science, Machine Learning, ... aws Seattle Amazon Web Services, Inc. US Do you want to be part of an innovative and ra... Do you want to be part of an innovative and ra... None efc0dc97-2f82-423e-abed-f8646820b52e 1460674 ... Art WA Data Scientist None 12 days https://account.amazon.jobs/jobs/1460674/apply {'id': None, 'business_category_id': None, 'id... team-data-science-primary True False
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1108 · 3+ years of experience as a Data Engineer or... amazondevices Seattle Amazon.com Services LLC US Are you excited about applying your data engin... Are you excited about applying your data engin... None 9dddcbcc-4ed7-4d69-90ce-9e5d146ebba0 1324823 ... Art WA Data Engineer - Fire TV Data Science & Analytics* None 5 months https://account.amazon.jobs/jobs/1324823/apply {'id': None, 'business_category_id': None, 'id... no-team-listed True False
1319 · Master’s degree in quantitative field such a... aws Arlington Amazon Dev Center U.S., Inc. US Do you like helping U.S. Intelligence Communit... Do you like helping U.S. Intelligence Communit... None 99ea1a90-c2f3-4e8b-b478-64fc4c215944 1327991 ... Art VA Sr Manager, Data Science - AWS Regions None 21 days https://account.amazon.jobs/jobs/1327991/apply {'id': None, 'business_category_id': None, 'id... team-us-security-clearance-jobs-primary True False
1628 · M.S in Operations Research, Industrial Engin... finance Seattle Amazon.com Services LLC US Are you seeking an environment where you can d... Are you seeking an environment where you can d... None 05b530d9-5380-4f00-b5f6-9cf6f5177af2 1423205 ... Art WA Research Science Manager, WW Consumer Controll... None 19 days https://account.amazon.jobs/jobs/1423205/apply {'id': None, 'business_category_id': None, 'id... team-fulfillment-operations True False
1683 · 7+ years of experience working directly with... operations Westborough Amazon.com Services LLC US Are you inspired by invention? Is problem solv... Are you inspired by invention? Is problem solv... None 8031f1ac-144c-4028-9762-ba15819f730d 1447710 ... Art MA Technical Program Manager - Amazon Robotics Da... None 26 days https://account.amazon.jobs/jobs/1447710/apply {'id': None, 'business_category_id': None, 'id... team-ww-ops-robotics True False
1707 · A Master’s degree in Computer Science, Compu... retail Seattle Amazon.com Services LLC US Take Earth's most customer-centric company. Mi... Take Earth's most customer-centric company. Mi... None 924d9007-de24-4c4b-ba8b-2b473a8c2a05 1352175 ... Art WA Applied Science Manager - Personalization, Rec... None 18 days https://account.amazon.jobs/jobs/1352175/apply {'id': None, 'business_category_id': None, 'id... team-personalization-and-recommendations True False

356 rows × 30 columns

In [ ]:
 
In [41]:
import numpy as np
table = pd.pivot_table(df, values=['business_category'], index=['title', 'basic_qualifications'],
                    aggfunc={'business_category': np.sum})
In [42]:
len(table)
Out[42]:
120
In [43]:
table
Out[43]:
business_category
title basic_qualifications
Computer Vision Data Scientist · <br/> · Master or PhD in computer vision/machine learning or related experience.<br/> · 3+ years of relevant experience in building production-scale system/algorithm in one of the following domains: computer vision, deep learning, or machine learning.<br/> · Coding skills in one or more programming languages such as Python, Scala, Java, C, C+<br/> · 2-3 years of modeling experience working with deep learning frameworks like Pytorch or MxNet.<br/> · Current hands-on experience with state-of-the-art object detection approaches (e.g. Faster RCNN, YOLO, CenterNet etc.)<br/> · Understanding of deep learning CV evaluation metrics including mAP, F_beta, PR curves, etc.<br/><br/> aws
Applied Science Manager - Personalization, Recommendation Systems, Data Science, Applied Science, Machine Learning · A Master’s degree in Computer Science, Computer Engineering, Mathematics, Statistics, or a related technical field; or equivalent combination of technical education and work experience.<br/>· 4+ years of experience in Applied Machine Learning, Statistics, or a closely-related field.<br/>· 1+ years of experience managing a software engineering or machine learning science team.<br/>· Must have delivered features for at least one large-scale production system. retail
Data Engineer - Fire TV Data Science & Analytics* · 3+ years of experience as a Data Engineer or in a similar role<br/>· Experience with data modeling, data warehousing, and building ETL pipelines<br/>· Experience in SQL<br/>· A desire to work in a collaborative, intellectually curious environment.<br/>· Bachelor's degree in computer science, engineering, mathematics, or a related technical discipline<br/>· 3+ years of industry experience in software development, data engineering, business intelligence, data science, or related field with a track record of manipulating, processing, and extracting value from large datasets<br/>· Highly-proficient with SQL and query performance tuning<br/>· Experience using big data technologies (Hadoop, Hive, Hbase, Spark, EMR, Redshift Spectrum, Athena, etc.)<br/>· Knowledge of data management fundamentals and data storage principles<br/>· Knowledge of distributed systems as it pertains to data storage and computing amazondevices
Data Science II · Masters 2 years of experience or a Bachelor’s degree in Statistics, Applied Math, Operations Research, Economics, Engineering or a related quantitative field with five years of working experience as a Data Scientist<br/>· Experience with statistical analysis, data modeling, machine learning, optimizations, regression modeling and forecasting, time series analysis, data mining, and demand modeling<br/>· Experience applying various machine learning techniques, and understanding the key parameters that affect their performance<br/>· Excellent written and verbal communication skills. Strong ability to interact, communicate, present, and influence within multiple levels of the organization.<br/>· Experience with Predictive analytics (e.g., forecasting, time-series, neural networks) and Prescriptive analytics (e.g., stochastic optimization, bandits, reinforcement learning)<br/>· Experience in Statistical Software such as R, Weka, SAS, SPSS<br/>· Proficiency with TABLEAU or other web based interfaces to create graphic-rich customizable plots, charts data maps etc<br/>· Able to write SQL scripts for analysis and reporting (Redshift, SQL, MySQL)<br/>· Experience using one or more Python, R, Java, C++ VBA, MATLAB, programming languages<br/>· Experience processing, filtering, and presenting large quantities (100K to Millions of rows) of data transportation-and-logistics
Data Science Intern · Enrolled in Master’s or Ph.D. degree in math, statistics, computer science, or related science field.<br/>· Experience with regression modeling, forecasting, and time series analysis.<br/>· Experience with data scripting languages (e.g. SQL, Python, R etc.) or statistical/mathematical software (e.g. R, SAS, or Matlab)<br/>· Experience using one or more programming languages (e.g., Python, Java, C++, C, etc.).<br/>· Experience with big data: processing, filtering, and presenting large quantities (100K to Millions of rows) of data.<br/> studentprograms
... ... ...
Sr Data Scientist, Selling Partner Paid Services · Bachelor’s degree or foreign equivalent in Statistics, Applied Math, Operations Research, Economics, or a related field<br/>· 5 years of years of relevant work experience in data science or related field, and 7-10 years of professional experience (experience in consumer-facing industry preferred)<br/>· Must have two years of experience in the following skills: building statistical models and machine learning models using large datasets from multiple resources<br/>· Experience using database technologies including SQL, ETL, Oracle, or SPSS<br/>· Expertise applying specialized modelling software including SAS, R, Python, Matlab, or Stata seller-servicesseller-services
Sr Manager, Data Science · PhD or equivalent Master's Degree plus 10+ years of experience in a quantitative field.<br/>· 5+ years of people management experience<br/>· Strong analytical skills.<br/>· 10+ years of experience of building predictive models for business and proficiency in model development and model validation.<br/>· Experience managing data pipelines for data ingestion<br/>· Experience working with software development teams and taking models to production<br/>· Experience managing business stakeholders across organizations<br/>· Strong communication skills amazondevices
Sr Manager, Data Science - AWS Regions · Master’s degree in quantitative field such as Computer Science, Statistics, Mathematics, etc.<br/>· 7+ years of relevant senior level experience in Data Science/Business Intelligence/ Analytics<br/>· 5+ years of team management experience with a track record of hiring & developing analytics professionals<br/>· 4+ years industry experience in applying Computer Science, Computer Engineering, Machine Learning, Statistics or related technical discipline<br/>· Current, active US Government Security Clearance of TS/SCI with Polygraph<br/><br/> aws
Technical Product Manager Data Science III · 2+ years of experience with end to end product delivery<br/>· 5+ years of technical product management, program management or engineering experience<br/>· Bachelor's Degree<br/>· Experience contributing to engineering discussions around technology decisions and strategy related to a product<br/>· Experience owning roadmap strategy and definition<br/>· Experience owning feature delivery and tradeoffs of a product<br/>· 4+ years of experience with data scripting languages (e.g SQL, Python, R etc.) or statistical/mathematical software (e.g. R, SAS, or Matlab) consumer_engagement
Technical Program Manager - Amazon Robotics Data Science, Business Intelligence & Data Engineering · 7+ years of experience working directly with engineering teams<br/>· Experience managing programs across cross functional teams, building processes and coordinating release schedules<br/>· Experience driving roadmap strategy and definition<br/>· 5+ years of technical program management experience<br/>-Experience coordinating between multiple project stakeholders, technical program managers, and software development teams.<br/>· Hands-on experience with agile methodologies.<br/><br/><br/><br/><br/> operations

120 rows × 1 columns

In [45]:
len(df)
Out[45]:
356
In [47]:
len(df['title'].value_counts())
Out[47]:
69
In [ ]: