Skip to content

Instantly share code, notes, and snippets.

@riaf
Created September 20, 2024 01:25
Show Gist options
  • Save riaf/7d947a8081c7c6179f55de3d2c4d0f43 to your computer and use it in GitHub Desktop.
Save riaf/7d947a8081c7c6179f55de3d2c4d0f43 to your computer and use it in GitHub Desktop.
import pandas as pd
csv_file = '00_zenkoku_all_20240830.csv'
df = pd.read_csv(csv_file, header=None, encoding='utf-8', low_memory=False)
df.columns = [
'sequence_number', 'corporate_number', 'process', 'correct', 'update_date',
'change_date', 'name', 'name_image_id', 'kind', 'prefecture_name', 'city_name',
'street_number', 'prefecture_code', 'city_code', 'postcode', 'address_outside',
'address_outside_image_id', 'close_date', 'close_cause', 'successor_corporate_number',
'change_cause', 'extra_column', 'assignment_date', 'latest', 'name_en', 'prefecture_name_en', 'city_name_en',
'address_outside_en', 'furigana', 'hihyoji'
]
new_establishments = df[df['process'] == 1]
new_establishments['assignment_date'] = pd.to_datetime(new_establishments['assignment_date'], errors='coerce')
def calculate_fiscal_year(date):
if pd.isnull(date):
return None
if date.month >= 4:
return date.year
else:
return date.year - 1
new_establishments['fiscal_year'] = new_establishments['assignment_date'].apply(calculate_fiscal_year)
new_establishments = new_establishments.dropna(subset=['fiscal_year'])
result = new_establishments.groupby(['city_name', 'fiscal_year']).size().reset_index(name='count')
print(result)
result.to_csv('new_establishments_by_city_fiscal_year.csv', index=False, encoding='utf-8')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment