luckmoon

Download CMake from: https://cmake.org/download/

wget https://cmake.org/files/v3.12/cmake-3.12.3.tar.gz

Compile from source and install

tar zxvf cmake-3.*

深度学习于语音合成研究综述

本文综述近年来深度学习用于语音合成的一些方法。

WaveNet

在自回归生成模型在图像和文本领域广泛应用的时候，WaveNet [4] 尝试将这些思想应用于语音领域。仿照PixelRNN (van den Oord et al., 2016)图像生成的做法， WaveNet依据之前采样点来生成下一个采样点。生成下一个采样点的模型为CNN结构。为了生成指定说话人的声音，以及生成指定文本的声音，引入了全局条件和局部条件，来控制合成内容。为了扩大感受野，带洞卷积，使filter的按照指数扩张。

WaveNet存在的问题是，1）每次预测一个采样点，速度太慢；2）如果用于TTS，那初始采样点选择将会很重要；3）以及需要文本前端的支持，前端分析出错，将直接影响合成效果。

	#!/usr/bin/env python
	# -- coding:UTF-8 --

	import torch
	import torch.nn as nn
	import torch.nn.init as init


	def weight_init(m):
	'''

	################################################################################
	# Method 1: Install using rpm packages (credit to DarkMukke)
	#

	rpm -Uvh http://mirror.ghettoforge.org/distributions/gf/gf-release-latest.gf.el7.noarch.rpm
	rpm --import http://mirror.ghettoforge.org/distributions/gf/RPM-GPG-KEY-gf.el7

	# WARNING: removing vim-minimal uninstalls `sudo` if you skip the second step
	# make sure to at least run `yum install sudo`
	yum -y remove vim-minimal vim-common vim-enhanced

	__author__ = 'Christoph Heindl'
	__copyright__ = 'Copyright 2017'
	__license__ = 'BSD'

	"""Trains a HMM based on gradient descent optimization.

	The parameters (theta) of the model are transition and
	emission probabilities, as well as the initial state probabilities.

	Given a start solution, the negative log likelihood of data given the

	import numpy as np
	import tensorflow as tf
	from tensorflow.python.layers import core as layers_core

	hparams = tf.contrib.training.HParams(
	batch_size=3,
	encoder_length=4,
	decoder_length=5,
	num_units=6,
	src_vocab_size=7,

	"""Script to illustrate usage of tf.estimator.Estimator in TF v1.3"""
	import tensorflow as tf

	from tensorflow.examples.tutorials.mnist import input_data as mnist_data
	from tensorflow.contrib import slim
	from tensorflow.contrib.learn import ModeKeys
	from tensorflow.contrib.learn import learn_runner


	# Show debugging output

	"""
	Author: Awni Hannun

	This is an example CTC decoder written in Python. The code is
	intended to be a simple example and is not designed to be
	especially efficient.

	The algorithm is a prefix beam search for a model trained
	with the CTC loss function.

	def _sequence_mask(sequence_length, max_len=None):
	if max_len is None:
	max_len = sequence_length.data.max()
	batch_size = sequence_length.size(0)
	seq_range = torch.range(0, max_len - 1).long()
	seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
	seq_range_expand = Variable(seq_range_expand)
	if sequence_length.is_cuda:
	seq_range_expand = seq_range_expand.cuda()
	seq_length_expand = (sequence_length.unsqueeze(1)

	yum install -y boost boost-devel
	yum install -y make gcc gcc-c++ kernel-devel python-devel
	wget https://github.com/arvidn/libtorrent/releases/download/libtorrent-1_0_10/libtorrent-rasterbar-1.0.10.tar.gz
	tar zxvf libtorrent-rasterbar-1.0.10.tar.gz
	cd libtorrent-rasterbar-1.0.10.tar.gz
	./configure --disable-debug --with-boost-libdir=/usr/lib64 --disable-encryption --enable-python-binding
	make && make install
	export LD_LIBRARY_PATH=/usr/local/lib/
	cd bindings/python
	python setup.py build