Skip to content

Instantly share code, notes, and snippets.

@alanhoyle
Last active November 30, 2020 16:51
Show Gist options
  • Save alanhoyle/58f69223150bf4fdfbf1ade797cc19f0 to your computer and use it in GitHub Desktop.
Save alanhoyle/58f69223150bf4fdfbf1ade797cc19f0 to your computer and use it in GitHub Desktop.
Build VEP cache for vcf2maf
# based on https://gist.github.com/ckandoth/5390e3ae4ecf182fa92f6318cfa9fa97
VEP_VER=97
VEP_CACHE=/opt/vep-cache
mkdir -p ${VEP_CACHE}
vep_install -a ap --NO_HTSLIB --NO_TEST --NO_UPDATE -s homo_sapiens -y GRCh38 -c ${VEP_CACHE} --convert --cache_version ${VEP_VER} --PLUGINS LoF
wget https://raw.githubusercontent.com/konradjk/loftee/v0.3-beta/splice_module.pl -O ${VEP_CACHE}/Plugins/splice_module.pl
cd ${VEP_CACHE}
wget ftp://ftp.broadinstitute.org:/pub/ExAC_release/release0.3.1/subsets/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz
echo "##FILTER=<ID=AC_Adj0_Filter,Description=\"Only low quality genotype calls containing alternate alleles are present\">" > header_line.tmp && \
curl -LO https://raw.githubusercontent.com/mskcc/vcf2maf/v1.6.16/data/known_somatic_sites.bed && \
echo "filtering and annotating the ExAC VCF" && \
bcftools annotate --header-lines header_line.tmp --remove FMT,^INF/AF,INF/AC,INF/AN,INF/AC_Adj,INF/AN_Adj,INF/AC_AFR,INF/AC_AMR,INF/AC_EAS,INF/AC_FIN,INF/AC_NFE,INF/AC_OTH,INF/AC_SAS,INF/AN_AFR,INF/AN_AMR,INF/AN_EAS,INF/AN_FIN,INF/AN_NFE,INF/AN_OTH,INF/AN_SAS ${VEP_CACHE}/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz | \
pv -f -l | \
bcftools filter --targets-file ^known_somatic_sites.bed --output-type z --output ${VEP_CACHE}/ExAC_nonTCGA.r0.3.1.sites.fixed.vcf.gz && \
mv -f ${VEP_CACHE}/ExAC_nonTCGA.r0.3.1.sites.fixed.vcf.gz ${VEP_CACHE}/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz && \
tabix -p vcf ${VEP_CACHE}/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz && \
rm header_line.tmp
# rsync -vh rsync://ftp.ensembl.org/ensembl/pub/release-${VEP_VER}/variation/vep/homo_sapiens_vep_${VEP_VER}_GRCh37.tar.gz ${VEP_CACHE} && \
# rsync -vh rsync://ftp.ensembl.org/ensembl/pub/release-${VEP_VER}/variation/vep/mus_musculus_vep_${VEP_VER}_GRCm38.tar.gz ${VEP_CACHE} && \
# OR:
# wget ftp://ftp.ensembl.org/ensembl/pub/release-${VEP_VER}/variation/vep/homo_sapiens_vep_${VEP_VER}_GRCh37.tar.gz ${VEP_CACHE} && \
# wget ftp://ftp.ensembl.org/ensembl/pub/release-${VEP_VER}/variation/vep/mus_musculus_vep_${VEP_VER}_GRCm38.tar.gz
rsync -vh --progress rsync://ftp.ensembl.org/ensembl/pub/release-${VEP_VER}/variation/vep/homo_sapiens_vep_${VEP_VER}_GRCh38.tar.gz ${VEP_CACHE}
# echo "expanding VEP cache" && \
# bash -c "cat ${VEP_CACHE}/*_vep_${VEP_VER}_GRC*.tar.gz | pv -f | tar -izxf - -C ${VEP_CACHE} " && \
# echo "... Removing source files ..." && \
# rm -v ${VEP_CACHE}/*_vep_${VEP_VER}_GRC*.tar.gz && \
echo "... converting VEP cache to compressed/index ..." && \
vep_convert_cache --species all --version all --dir ${VEP_CACHE}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment