pochemuto · March 21, 2018 10:10
diff --git a/ypp-fix.py b/ypp-fix.py
 #coding: utf
 from mutagen.mp3 import MP3
 from mutagen.easyid3 import EasyID3
 from mutagen.id3 import ID3, TIT2, TIT3, TDES, TDRL, TimeStampTextFrame, ID3TimeStamp
 from mutagen.id3._util import ID3NoHeaderError
 from os import path
 from datetime import datetime
 import os
 import re
 import shutil
 from datetime import timedelta


 class NotFound(Exception):
  def __init__(self, message):
    super(Exception, self).__init__(message)

 class Notes:
  date_pattern = re.compile('(\d+) (\w+) (\d+) (\d+):(\d+)')
  months = ['января', 'февраля', 'марта', 'апреля', 'мая', 'июня', 'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря']

  def __init__(self):
    self.title = ''
    self.date = ''
    self.text = ''
    self.number = 0

  def date_from_str(self, strdate):
    m = type(self).date_pattern.match(strdate)
    month_str = m.group(2).lower()
    month = type(self).months.index(month_str) + 1
    self.date = datetime(int(m.group(3)), month, int(m.group(1)), int(m.group(4)), int(m.group(5)))

  def __repr__(self):
    return '{0} [{1}]: {2}'.format(self.title, self.date, self.text)

 def get_number(title, mp3_path):
  title = title.strip()
  match = re.match('(Встреча|ЯПП|YPP|Выпуск|Пьянка|Подкаст|Шоу|Budam|Встерча|Янки после пьянки|ЯПП - После РТ|Будам)?\s*[#№]?([0-9]+)', title, flags=re.I)
  if match:
    return int(match.group(2))
  match = re.match('Пьянка\s+#([0-9]+)', title)
  if match:
    return int(match.group(1))
  if 'сорок четыре' in title:
    return 44
  if 'Представляюсь и ругаюсь' == title:
    return 1
  if 'ЯПП и Будам - Зачем нужны мужчины' == title:
    return 270
  if 'ЯПП и Будам - О музыке, птичках и собачках' == title:
    return 525
  if 'Ученые записки' == title:
    return 384
  if 'ЯПП и Будам  - Что такое цивилизованная страна' == title:
    return 534
  if 'Записал подкаст Будам, то да се мы обсуждам' == title:
    return -1 # файл все равно отсутствует
  if 'Ученые записки  № 2. О семье и браке' == title:
    return 387
  match = re.search('ypp([0-9]+)', mp3_path, re.I)
  if match:
    return int(match.group(1))
  raise Exception("couldn't get number from " + title)

 class Mp3:
  def __init__(self, filepath):
    self.filepath = filepath
    try:
      self.mp3 = EasyID3(filepath)
    except ID3NoHeaderError:
      self.mp3 = EasyID3()


  def get(self, tag):
    try:
      return self.mp3[tag]
    except KeyError:
      return None

  def save(self):
    filepath = self.filepath
    return self.mp3.save(filename=filepath, v1=2, v2_version=3)

  def set(self, tag, value):
    self.mp3[tag] = value

 def read(filepath, mp3_path=None):
  data = open(filepath, encoding='cp1251').readlines()
  notes = Notes()
  notes.title = data[0].strip()
  notes.date_from_str(data[1].strip())
  notes.text = ''.join(data[3:]).strip()
  notes.number = get_number(notes.title, mp3_path)
  return notes

 def get_file_name(podcast_path):
  files = os.listdir(podcast_path)
  for filename in files:
    if filename.endswith('.mp3'):
      return path.realpath(path.join(podcast_path, filename))
  raise NotFound('mp3 file not found in ' + podcast_path)
  
 def pprint(mp3):
  for tag in ['title', 'artist', 'date', 'description', 'releasedate']:
    print('{0}: {1}'.format(tag, mp3.get(tag)))

 def folder(podcast_path):
  print('process ' + podcast_path)
  mp3_path = get_file_name(podcast_path)
  notes = read(path.join(podcast_path, 'text.txt'), mp3_path)
  mp3 = Mp3(mp3_path)
  mp3.set('title', notes.title)
  mp3.set('artist', 'Янки после пьянки')
  mp3.set('releasedate', notes.date.strftime('%Y-%m-%d %H:%M:%S'))
  mp3.set('date', notes.date.strftime('%Y-%m-%d %H:%M:%S'))
  mp3.set('description', notes.text)
  mp3.set('title3', notes.text)
  mp3.set('tracknumber', str(notes.number))
  mp3.save()
  print(mp3_path)
  
  new_name = notes.date.strftime('%Y-%m-%d') + ' - ' + notes.title + '.mp3'
  new_path = os.path.join(os.path.dirname(podcast_path), new_name)
  if mp3_path != podcast_path:
    shutil.move(mp3_path, new_path)
    print('moved to ' + new_path)


 def main(root_dir='.'):
  print('processing ' + root_dir)
  EasyID3.RegisterTextKey('description', 'TDES')
  EasyID3.RegisterTextKey('title3', 'TIT3')
  EasyID3.RegisterTextKey('releasedate', 'TDRL')

  errors = 0
  deleted = 0
  processed = 0
  for filename in sorted(os.listdir(root_dir)):
    filename = os.path.join(root_dir, filename)
    if path.isdir(filename):
      if filename.endswith('_1') and path.isdir(filename[:-2]):
        shutil.rmtree(filename)
        print('deleted ' + filename)
        deleted += 1
        continue
      # folder(path.join(root_dir, filename))
      try:
        folder(path.join(root_dir, filename))
        processed += 1
      except NotFound as e:
        errors += 1
        print('####### ' + filename + ': ' + str(type(e)) + " " + str(e))
      except Exception as e:
        print(filename)
        raise e
  if errors > 0:
    print(f'got {errors} errors')  
  if deleted > 0:
    print(f'deleted {errors} duplicates')  
  if processed > 0:
    print(f'processed {processed} files')
  print('done')

 def bitrate(root_dir='.'):
  def action(filename):
    print(path.basename(filename) + '   ' + str(int(MP3(filename).info.bitrate / 1000)) + ' kbps')
  process(action, root_dir)

 def length(root_dir='.'):
  def action(filename, context):
    return context + MP3(filename).info.length

  total = process(action, root_dir, context=0)
  td = timedelta(seconds=total)
  hours = td.seconds / 60 / 60
  minutes = (td.seconds / 60) % 60
  seconds = td.seconds % 60
  print('{}:{}:{}'.format(int(td.days * 24 + hours), minutes, seconds))

 def clean_filename(root_dir='.'):
  def action(filename):
    chars = r'\/:*?"<>|–'
    for ch in chars:
      if ch in path.basename(filename):
        print(f'{filename} contains "{ch}"')
    match = re.search(r'[^,()№ё!A-Za-z\d#.А-Яа-я- ]+', path.basename(filename))
    if match:
      print(f'{filename} contains something "{match.group()}"')

  process(action, root_dir)


 def process(action, root_dir, context=None):
  dirs = []
  for filename in sorted(os.listdir(root_dir)):
    filename = os.path.join(root_dir, filename)
    if path.isdir(filename):
      dirs.append(filename)
    elif path.isfile(filename) and filename.endswith('.mp3'):
      if context is not None:
        context = action(filename, context)
      else:
        action(filename)

  for d in dirs:
    if context is not None:
      context = action(filename, context)
    else:
      action(filename)

  return context

 if __name__ == '__main__':
  #main()
  bitrate()
  clean_filename()
	#coding: utf
	from mutagen.mp3 import MP3
	from mutagen.easyid3 import EasyID3
	from mutagen.id3 import ID3, TIT2, TIT3, TDES, TDRL, TimeStampTextFrame, ID3TimeStamp
	from mutagen.id3._util import ID3NoHeaderError
	from os import path
	from datetime import datetime
	import os
	import re
	import shutil
	from datetime import timedelta


	class NotFound(Exception):
	def __init__(self, message):
	super(Exception, self).__init__(message)

	class Notes:
	date_pattern = re.compile('(\d+) (\w+) (\d+) (\d+):(\d+)')
	months = ['января', 'февраля', 'марта', 'апреля', 'мая', 'июня', 'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря']

	def __init__(self):
	self.title = ''
	self.date = ''
	self.text = ''
	self.number = 0

	def date_from_str(self, strdate):
	m = type(self).date_pattern.match(strdate)
	month_str = m.group(2).lower()
	month = type(self).months.index(month_str) + 1
	self.date = datetime(int(m.group(3)), month, int(m.group(1)), int(m.group(4)), int(m.group(5)))

	def __repr__(self):
	return '{0} [{1}]: {2}'.format(self.title, self.date, self.text)

	def get_number(title, mp3_path):
	title = title.strip()
	match = re.match('(Встреча\|ЯПП\|YPP\|Выпуск\|Пьянка\|Подкаст\|Шоу\|Budam\|Встерча\|Янки после пьянки\|ЯПП - После РТ\|Будам)?\s*[#№]?([0-9]+)', title, flags=re.I)
	if match:
	return int(match.group(2))
	match = re.match('Пьянка\s+#([0-9]+)', title)
	if match:
	return int(match.group(1))
	if 'сорок четыре' in title:
	return 44
	if 'Представляюсь и ругаюсь' == title:
	return 1
	if 'ЯПП и Будам - Зачем нужны мужчины' == title:
	return 270
	if 'ЯПП и Будам - О музыке, птичках и собачках' == title:
	return 525
	if 'Ученые записки' == title:
	return 384
	if 'ЯПП и Будам - Что такое цивилизованная страна' == title:
	return 534
	if 'Записал подкаст Будам, то да се мы обсуждам' == title:
	return -1 # файл все равно отсутствует
	if 'Ученые записки № 2. О семье и браке' == title:
	return 387
	match = re.search('ypp([0-9]+)', mp3_path, re.I)
	if match:
	return int(match.group(1))
	raise Exception("couldn't get number from " + title)

	class Mp3:
	def __init__(self, filepath):
	self.filepath = filepath
	try:
	self.mp3 = EasyID3(filepath)
	except ID3NoHeaderError:
	self.mp3 = EasyID3()


	def get(self, tag):
	try:
	return self.mp3[tag]
	except KeyError:
	return None

	def save(self):
	filepath = self.filepath
	return self.mp3.save(filename=filepath, v1=2, v2_version=3)

	def set(self, tag, value):
	self.mp3[tag] = value

	def read(filepath, mp3_path=None):
	data = open(filepath, encoding='cp1251').readlines()
	notes = Notes()
	notes.title = data[0].strip()
	notes.date_from_str(data[1].strip())
	notes.text = ''.join(data[3:]).strip()
	notes.number = get_number(notes.title, mp3_path)
	return notes

	def get_file_name(podcast_path):
	files = os.listdir(podcast_path)
	for filename in files:
	if filename.endswith('.mp3'):
	return path.realpath(path.join(podcast_path, filename))
	raise NotFound('mp3 file not found in ' + podcast_path)

	def pprint(mp3):
	for tag in ['title', 'artist', 'date', 'description', 'releasedate']:
	print('{0}: {1}'.format(tag, mp3.get(tag)))

	def folder(podcast_path):
	print('process ' + podcast_path)
	mp3_path = get_file_name(podcast_path)
	notes = read(path.join(podcast_path, 'text.txt'), mp3_path)
	mp3 = Mp3(mp3_path)
	mp3.set('title', notes.title)
	mp3.set('artist', 'Янки после пьянки')
	mp3.set('releasedate', notes.date.strftime('%Y-%m-%d %H:%M:%S'))
	mp3.set('date', notes.date.strftime('%Y-%m-%d %H:%M:%S'))
	mp3.set('description', notes.text)
	mp3.set('title3', notes.text)
	mp3.set('tracknumber', str(notes.number))
	mp3.save()
	print(mp3_path)

	new_name = notes.date.strftime('%Y-%m-%d') + ' - ' + notes.title + '.mp3'
	new_path = os.path.join(os.path.dirname(podcast_path), new_name)
	if mp3_path != podcast_path:
	shutil.move(mp3_path, new_path)
	print('moved to ' + new_path)


	def main(root_dir='.'):
	print('processing ' + root_dir)
	EasyID3.RegisterTextKey('description', 'TDES')
	EasyID3.RegisterTextKey('title3', 'TIT3')
	EasyID3.RegisterTextKey('releasedate', 'TDRL')

	errors = 0
	deleted = 0
	processed = 0
	for filename in sorted(os.listdir(root_dir)):
	filename = os.path.join(root_dir, filename)
	if path.isdir(filename):
	if filename.endswith('_1') and path.isdir(filename[:-2]):
	shutil.rmtree(filename)
	print('deleted ' + filename)
	deleted += 1
	continue
	# folder(path.join(root_dir, filename))
	try:
	folder(path.join(root_dir, filename))
	processed += 1
	except NotFound as e:
	errors += 1
	print('####### ' + filename + ': ' + str(type(e)) + " " + str(e))
	except Exception as e:
	print(filename)
	raise e
	if errors > 0:
	print(f'got {errors} errors')
	if deleted > 0:
	print(f'deleted {errors} duplicates')
	if processed > 0:
	print(f'processed {processed} files')
	print('done')

	def bitrate(root_dir='.'):
	def action(filename):
	print(path.basename(filename) + ' ' + str(int(MP3(filename).info.bitrate / 1000)) + ' kbps')
	process(action, root_dir)

	def length(root_dir='.'):
	def action(filename, context):
	return context + MP3(filename).info.length

	total = process(action, root_dir, context=0)
	td = timedelta(seconds=total)
	hours = td.seconds / 60 / 60
	minutes = (td.seconds / 60) % 60
	seconds = td.seconds % 60
	print('{}:{}:{}'.format(int(td.days * 24 + hours), minutes, seconds))

	def clean_filename(root_dir='.'):
	def action(filename):
	chars = r'\/:*?"<>\|–'
	for ch in chars:
	if ch in path.basename(filename):
	print(f'{filename} contains "{ch}"')
	match = re.search(r'[^,()№ё!A-Za-z\d#.А-Яа-я- ]+', path.basename(filename))
	if match:
	print(f'{filename} contains something "{match.group()}"')

	process(action, root_dir)


	def process(action, root_dir, context=None):
	dirs = []
	for filename in sorted(os.listdir(root_dir)):
	filename = os.path.join(root_dir, filename)
	if path.isdir(filename):
	dirs.append(filename)
	elif path.isfile(filename) and filename.endswith('.mp3'):
	if context is not None:
	context = action(filename, context)
	else:
	action(filename)

	for d in dirs:
	if context is not None:
	context = action(filename, context)
	else:
	action(filename)

	return context

	if __name__ == '__main__':
	#main()
	bitrate()
	clean_filename()