Params 209,190,912. Fraction Embedding: 19%
Params 265,814,016. Fraction Embedding: 15%
Params 354,418,688. Fraction Embedding: 15%
Params 455,081,984. Fraction Embedding: 12%
Params 1,312,817,152. Fraction Embedding: 8%
Params 1,715,470,336. Fraction Embedding: 6%
Params 2,875,195,392. Fraction Embedding: 5%
Params 3,781,165,056. Fraction Embedding: 4%
Params 3,882,876,928. Fraction Embedding: 5%
Params 5,116,002,304. Fraction Embedding: 4%
Params 5,041,553,408. Fraction Embedding: 4%
Params 6,652,166,144. Fraction Embedding: 3%
Multilingual
Params 361,869,312. Fraction Embedding: 53%
Params 557,989,888. Fraction Embedding: 46%
Params 1,719,959,552. Fraction Embedding: 30%
Params 3,485,908,992. Fraction Embedding: 22%
Params 4,595,376,128. Fraction Embedding: 19%
Params 5,855,838,208. Fraction Embedding: 17%
def model_params(d, nl, en_only=True):
vocab=51200 if en_only else 250000
embed_params = vocab * d
params_per_layer = (d ** 2 * 12)
tot_params = nl * params_per_layer + embed_params
print(f'Params {tot_params:,}. Fraction Embedding: {embed_params/tot_params:.0%}')
#return dict(total=tot_params, embed_frac=embed_params/tot_params)
#model_params(1024, 12)
#model_params(1024, 12)
sizes = [768, 1024, 2048, 3072, 3584, 4096]
for d in sizes:
model_params(d, 24)
model_params(d, 32)
print('Multilingual')
for d in sizes:
model_params(d, 24, en_only=False)