CREATE TABLE surveys
(
`response_id` Int64,
`development_activity` Enum8('I am a developer by profession' = 1, 'I am a student who is learning to code' = 2, 'I am not primarily a developer, but I write code sometimes as part of my work' = 3, 'I code primarily as a hobby' = 4, 'I used to be a developer by profession, but no longer am' = 5, 'None of these' = 6, 'NA' = 7),
`employment` Enum8('Independent contractor, freelancer, or self-employed' = 1, 'Student, full-time' = 2, 'Employed full-time' = 3, 'Student, part-time' = 4, 'I prefer not to say' = 5, 'Employed part-time' = 6, 'Not employed, but looking for work' = 7, 'Retired' = 8, 'Not employed, and not looking for work' = 9, 'NA' = 10),
`country` LowCardinality(String),
`us_state` LowCardinality(String),
`uk_county` LowCardinality(String),
`education_level` Enum8('Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)' = 1, 'Bachelor’s degree (B.A., B.S., B.Eng., etc.)' = 2, 'Master’s degree (M.A., M.S., M.Eng., MBA, etc.)' = 3, 'Other doctoral degree (Ph.D., Ed.D., etc.)' = 4, 'Some college/university study without earning a degree' = 5, 'Something else' = 6, 'Professional degree (JD, MD, etc.)' = 7, 'Primary/elementary school' = 8, 'Associate degree (A.A., A.S., etc.)' = 9, 'NA' = 10),
`age_started_to_code` Enum8('Younger than 5 years' = 1, '5 - 10 years' = 2, '11 - 17 years' = 3, '18 - 24 years' = 4, '25 - 34 years' = 5, '35 - 44 years' = 6, '45 - 54 years' = 7, '55 - 64 years' = 8, 'Older than 64 years' = 9, 'NA' = 10),
`how_learned_to_code` Enum8('Coding Bootcamp' = 1, 'Other online resources (ex: videos, blogs, etc)' = 2, 'School' = 3, 'Online Forum' = 4, 'Friend or family member' = 5, 'Online Courses or Certification' = 6, 'Other (please specify):' = 7, 'Colleague' = 8, 'Books / Physical media' = 9, 'NA' = 10),
`years_coding` Nullable(UInt8),
`years_as_a_professional_developer` Nullable(UInt8),
`developer_type` String,
`organization_size` Enum8('Just me - I am a freelancer, sole proprietor, etc.' = 1, '2 to 9 employees' = 2, '10 to 19 employees' = 3, '20 to 99 employees' = 4, '100 to 499 employees' = 5, '500 to 999 employees' = 6, '1,000 to 4,999 employees' = 7, '5,000 to 9,999 employees' = 8, '10,000 or more employees' = 9, 'I don’t know' = 10, 'NA' = 11),
`compensation_total` Nullable(UInt64),
`compensation_frequency` Enum8('Weekly' = 1, 'Monthly' = 2, 'Yearly' = 3, 'NA' = 4),
`language_have_worked_with` String,
`language_want_to_work_with` String,
`database_have_worked_with` String,
`database_want_to_work_with` String,
`platform_have_worked_with` String,
`platform_want_to_work_with` String,
`web_framework_have_worked_with` String,
`web_framework_want_to_work` String,
`other_tech_have_worked_with` String,
`other_tech_want_to_work` String,
`infrastructure_tools_have_worked_with` String,
`infrastructure_tools_want_to_work_with` String,
`developer_tools_have_worked_with` String,
`developer_tools_want_to_work_with` String,
`operating_system` Enum8('MacOS' = 1, 'Windows' = 2, 'Linux-based' = 3, 'BSD' = 4, 'Other (please specify):' = 5, 'Windows Subsystem for Linux (WSL)' = 6, 'NA' = 7),
`frequency_visit_stackoverflow` Enum8('Multiple times per day' = 1, 'Daily or almost daily' = 2, 'A few times per week' = 3, 'A few times per month or weekly' = 4, 'Less than once per month or monthly' = 5, 'NA' = 6),
`has_stackoverflow_account` Enum8('Yes' = 1, 'No' = 2, 'Not sure/can\'t remember' = 3, 'NA' = 4),
`frequency_use_in_stackoverflow` Enum8('Multiple times per day' = 1, 'Daily or almost daily' = 2, 'A few times per week' = 3, 'A few times per month or weekly' = 4, 'Less than once per month or monthly' = 5, 'I have never participated in Q&A on Stack Overflow' = 6, 'NA' = 7),
`consider_self_active_community_member` Enum8('Yes, definitely' = 1, 'Neutral' = 2, 'Yes, somewhat' = 3, 'No, not at all' = 4, 'No, not really' = 5, 'NA' = 6, 'Not sure' = 7),
`member_other_communities` Enum8('Yes' = 1, 'No' = 2, 'NA' = 4),
`age` Enum8('Under 18 years old' = 1, '18-24 years old' = 2, '25-34 years old' = 3, '35-44 years old' = 4, '45-54 years old' = 5, '55-64 years old' = 6, '65 years or older' = 7, 'NA' = 8, 'Prefer not to say' = 9),
`annual_salary` Nullable(UInt64)) ENGINE = MergeTree ORDER BY tuple()
INSERT INTO surveys SELECT
ResponseId as response_id,
MainBranch as development_activity,
Employment as employment,
Country as country,
US_State as us_state,
UK_Country as uk_country,
EdLevel as education_level,
Age1stCode as age_started_to_code,
arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(LearnCode))) as how_learned_to_code,
multiIf(YearsCode = 'Less than 1 year', 0, YearsCode = 'More than 50 years', 51, toUInt8OrNull(YearsCode)) as years_coding,
multiIf(YearsCodePro = 'Less than 1 year', 0, YearsCodePro = 'More than 50 years', 51, toUInt8OrNull(YearsCodePro)) as years_as_a_professional_developer,
arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(DevType))) AS developer_type,
OrgSize as organization_size,
CompTotal as compensation_total,
CompFreq as compensation_frequency,
arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(LanguageHaveWorkedWith))) AS language_have_worked_with,
arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(LanguageWantToWorkWith))) AS language_want_to_work_with,
arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(DatabaseHaveWorkedWith))) AS database_have_worked_with,
arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(DatabaseWantToWorkWith))) AS database_want_to_work_with,
arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(PlatformHaveWorkedWith))) AS platform_have_worked_with,
arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(PlatformWantToWorkWith))) AS platform_want_to_work_with,
arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(WebframeHaveWorkedWith))) AS web_framework_have_worked_with,
arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(WebframeWantToWorkWith))) AS web_framework_want_to_work,
arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(MiscTechHaveWorkedWith))) AS other_tech_have_worked_with,
arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(MiscTechWantToWorkWith))) AS other_tech_want_to_work,
arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(ToolsTechHaveWorkedWith))) AS infrastructure_tools_have_worked_with,
arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(ToolsTechWantToWorkWith))) AS infrastructure_tools_want_to_work_with,
arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(NEWCollabToolsHaveWorkedWith))) AS developer_tools_have_worked_with,
arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(NEWCollabToolsWantToWorkWith))) AS developer_tools_want_to_work_with,
OpSys as operating_system,
SOVisitFreq as frequency_visit_stackoverflow,
SOAccount as has_stackoverflow_account,
SOPartFreq as frequency_use_in_stackoverflow,
SOComm as consider_self_active_community_member,
NEWOtherComms as member_other_communities,
Age as age,
ConvertedCompYearly as annual_salary
FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/surveys/2021/survey_results_public.csv')
Created
February 21, 2024 16:50
-
-
Save gingerwizard/d3b32ed801973498e87145ed0c6e4bdb to your computer and use it in GitHub Desktop.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment