Last active
February 8, 2020 12:27
-
-
Save arakaki-asdf/4c14e0570b5bfc6ccc0794bca4be910e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from __future__ import annotations | |
from typing import List | |
from typing import Any | |
import csv | |
import json | |
import os | |
import pandas as pd | |
"""readme | |
python 3.7以降 | |
pip install pandas | |
pip install mypy | |
vscode setting.json { "python.linting.mypyEnabled": true } | |
""" | |
class CsvValidator: | |
"""csvデータの特定のカラムの検証を行うクラス | |
Pandas 基本操作 https://qiita.com/ysdyt/items/9ccca82fc5b504e7913a | |
検証するjsonのデータ形式 (* 必須, _ なくてもいい) | |
{ | |
"path": * csvのパス | |
"validates": * 検証データ(配列ハッシュ) | |
[ | |
{ | |
"error": * エラー内容 | |
"columns": * 関連するカラム | |
[ | |
] | |
"gropuby": _ keyで指定したカラムの値をグループ化 | |
{ | |
"key": * グループ化するカラム | |
"duplicate": _ 重複チェックするカラム | |
} | |
"query": _ クエリ実行 (比較、値チェックなど) | |
} | |
] | |
} | |
""" | |
SUCCESS: int = 0 | |
FAILED: int = 1 | |
def __init__(self) -> None: | |
self.error_count: int = 0 | |
self.csv_columns: List[Any] = [] | |
def error_log(self, message) -> None: | |
"""エラーログ""" | |
print(f"error: {message}") | |
self.error_count += 1 | |
def exist_column(self, column: str) -> bool: | |
"""csvカラムの存在確認""" | |
if not column in self.csv_columns: | |
self.error_log(f"csvのカラムに{column}は存在しません") | |
return False | |
return True | |
def exist_columns(self, columns: List[str]) -> bool: | |
"""csvカラムの存在確認""" | |
ary = list(filter(lambda x: not x in self.csv_columns, columns)) | |
if ary: | |
self.error_log(f"csvのカラムに{ary}は存在しません") | |
return False | |
return True | |
def read_csv(self, path: str, validates: List[Any]) -> None: | |
"""csv読み込み | |
args: | |
path: csvのパス | |
validates: jsonデータ | |
""" | |
try: | |
table: Any = pd.read_csv(path, sep=',') | |
except IOError as e: | |
self.error_log("csv読み込みエラー") | |
return | |
self.csv_columns = table.columns.values | |
for validate in validates: | |
self.parse_validate(table, validate) | |
def parse_validate(self, table: Any, validate: Any) -> None: | |
"""jsonパース | |
args: | |
table: csvデータ | |
validate: jsonデータ | |
""" | |
if not validate.get('columns'): | |
self.error_log("columnsが使用されていません") | |
return | |
if not validate.get('error'): | |
self.error_log("errorが使用されていません") | |
return | |
columns: List[str] = validate['columns'] | |
comment: str = validate['error'] | |
if len(columns) == 0: | |
return | |
else: | |
if not self.exist_columns(columns): | |
return | |
if validate.get('query'): | |
self.parse_query(table, validate, columns, comment) | |
if validate.get('groupby'): | |
if not validate['groupby'].get('key'): | |
self.error_log("groupbyにkeyが指定されていません") | |
return | |
self.parse_groupby(table, validate, columns, comment) | |
def parse_query(self, table: Any, validate: Any, columns: List[str], comment: str) -> None: | |
"""クエリ実行 | |
args: | |
table: csvデータ | |
validate: jsonデータ | |
columns: 関連するカラム | |
comment: エラー文言 | |
""" | |
query = validate['query'] | |
res = table[columns].query(query) | |
if len(res) != 0: | |
self.error_log(comment) | |
print(res) | |
def parse_groupby(self, table: Any, validate: Any, columns: List[str], comment: str) -> None: | |
"""グループ化 | |
args: | |
table: csvデータ | |
validate: jsonデータ | |
columns: 関連するカラム | |
comment: エラー文言 | |
""" | |
key = validate['groupby']['key']; | |
for name, group in table.groupby(key): | |
if (validate['groupby'].get('duplicate')): | |
duplicate_key = validate['groupby']['duplicate'] | |
if not self.exist_column(duplicate_key): | |
return | |
self.parse_duplicate(group, duplicate_key, comment) | |
def parse_duplicate(self, table: Any, duplicate_key: str, comment: str) -> None: | |
"""重複チェック | |
args: | |
table: csvデータ | |
duplicate_key: 重複チェックするカラム | |
comment: エラー文言 | |
""" | |
res = table[table[duplicate_key].duplicated()] | |
if len(res) != 0: | |
self.error_log(comment) | |
print(res); | |
def run(self, path: str) -> int: | |
"""検証実行 | |
args: | |
path: jsonパス | |
return: | |
成功 0 | |
失敗 1 | |
""" | |
if not os.path.exists(path): | |
self.error_log(f"{path}: パスが存在しません") | |
return self.FAILED | |
with open(path, 'r', encoding='UTF-8') as f: | |
print(f"### {path} ###") | |
try: | |
root = json.load(f) | |
except json.JSONDecodeError as e: | |
self.error_log("json decode error.") | |
return self.FAILED | |
if not root.get('path'): | |
self.error_log("pathが指定されていません") | |
return self.FAILED | |
if not root.get('validates'): | |
self.error_log("validatesが指定されていません") | |
return self.FAILED | |
self.read_csv(root["path"], root["validates"]) | |
if self.error_count > 0: | |
return self.FAILED | |
return self.SUCCESS | |
validator = CsvValidator() | |
validator.run('test.json') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment