This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import torch | |
import numpy as np | |
from sentence_transformers import SentenceTransformer | |
# Load pre-trained model for sentence embeddings | |
model = SentenceTransformer("paraphrase-multilingual-mpnet-base-v2") | |
# Set up LSTM model | |
input_size = 768 # Size of the sentence embeddings |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import argparse | |
from typing import Dict | |
from pathlib import Path | |
import smart_open | |
import ftfy | |
from tqdm import tqdm | |
import html2text | |
from datasets import load_dataset |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
from flair.data import Sentence | |
from flair.embeddings import ( | |
DocumentEmbeddings, | |
FlairEmbeddings, | |
DocumentLMEmbeddings, | |
DocumentPoolEmbeddings, | |
) | |
from torch import Tensor |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# You will need `apt get parallel pv` to make it run | |
# download file containing urls | |
curl http://webdatacommons.org/structureddata/2022-12/files/file.list > urls.txt | |
# create output file | |
touch output.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import bz2 | |
import logging | |
import multiprocessing | |
import re | |
from pickle import PicklingError | |
# LXML isn't faster, so let's go with the built-in solution | |
from xml.etree.ElementTree import iterparse |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install pymorphy3 | |
# pip install pymorphy3-dicts-uk | |
import pymorphy3 | |
from collections import defaultdict | |
from itertools import product | |
from typing import List, List | |
morph = pymorphy3.MorphAnalyzer(lang="uk") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os.path | |
from flair.data import Dictionary | |
from flair.models import LanguageModel | |
from flair.trainers.language_model_trainer import LanguageModelTrainer, TextCorpus | |
def train_flair_embeddings( | |
corpus_path="/data/ubertext/for_flair", | |
dictionary_path="/home/dima/Projects/flair_embeddings/flair_dictionary.pkl", | |
lm_file="./language_model_forward_no_amp_accum_grad_fixed", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import wn | |
import csv | |
from collections import Counter, defaultdict | |
from tqdm.notebook import tqdm | |
wn.download("pwn:3.1") | |
pwn = wn.Wordnet("pwn:3.1") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"lotNumberStr": "27059380", | |
"ln": 27059380, | |
"mkn": "DODGE", | |
"lm": "CHALLENGER", | |
"lcy": 2018, | |
"fv": "2C3CDZGGXJH289026", | |
"la": 24998, | |
"rc": 27890, | |
"obc": "A", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- ../venv/lib/python3.6/site-packages/flask_lambda.py 2017-07-04 02:51:23.000000000 +0300 | |
+++ lambdas/venues_finder/venues_finder/lambdify.py 2017-07-10 16:49:41.000000000 +0300 | |
@@ -31,7 +31,7 @@ | |
except ImportError: | |
from io import StringIO | |
-from werkzeug.wrappers import BaseRequest | |
+from werkzeug.wrappers import BaseRequest, Response | |
NewerOlder