Skip to content

Instantly share code, notes, and snippets.

from transformers import AutoTokenizer, AutoModel
import torch
# MODEL CKPT is downloaded from: "jinaai/jina-embeddings-v2-base-en" # has context len of 8192
MODEL_CKPT = "/Users/rohan/3_Resources/ai_models/jina-embeddings-v2-base-en"
def recursive_splitter(text: str, separators: list[str], chunk_size: int) -> list[str]:
if len(separators) == 0:
words = text.strip().split(' ')
import dataclasses
import openai
import os
import re
import yaml
from pydantic import BaseModel, ValidationError
from typing import Optional
@RohanAwhad
RohanAwhad / init.lua
Created July 29, 2024 13:10
My neovim config
--[[
=====================================================================
==================== READ THIS BEFORE CONTINUING ====================
=====================================================================
======== .-----. ========
======== .----------------------. | === | ========
======== |.-""""""""""""""""""-.| |-----| ========
======== || || | === | ========
======== || KICKSTART.NVIM || |-----| ========
@RohanAwhad
RohanAwhad / github2file.py
Last active April 17, 2024 22:22
A utility script to download and process files from a GitHub repository based on language-specific criteria.
# Inspiration from Eric Hartford github2file: https://github.com/cognitivecomputations/github2file
"""
A utility script to download and process files from a GitHub repository based on language-specific criteria.
Features:
- Download files from a specific branch or tag of a GitHub repository.
- Filter files based on their extension to include only those relevant to the specified programming language.
- Exclude files located in certain directories or those that match a set of predefined non-useful criteria.
- Check and exclude test files based on content indicators specific to each supported language.
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
function clickShowTranscriptButton() {
// Selecting the button based on its class and aria-label
var buttons = document.querySelectorAll(
"button.yt-spec-button-shape-next.yt-spec-button-shape-next--outline.yt-spec-button-shape-next--call-to-action.yt-spec-button-shape-next--size-m"
);
@RohanAwhad
RohanAwhad / extractAndDownloadApolloSearchResults.js
Created March 6, 2024 21:50
Extract details from the search response in Apollo.io
async function extractDataAndPaginate() {
const results = [];
function extractDataFromPage() {
const data = [];
document.querySelectorAll('tbody.zp_RFed0').forEach((tbody) => {
const rows = tbody.querySelectorAll('tr');
rows.forEach((row) => {
const nameLink = row.querySelector('a[href*="/contacts/"]');
const linkedInLink = row.querySelector('a[href*="linkedin.com"]');
(function() {
// Initialize an array to hold the conversation data
let conversationData = [];
// Select all conversation blocks
let conversationBlocks = document.querySelectorAll('[data-testid^="conversation-turn"]');
conversationBlocks.forEach(block => {
// Determine the role directly from data attributes in the HTML
let roleElement = block.querySelector('[data-message-author-role]');
// Updated extractLinkedInPosts function with continuous scrolling
async function extractLinkedInPosts() {
const postsData = [];
const seenTexts = new Set();
let lastLength = 0;
while (postsData.length < 20) {
window.scrollTo(0, document.body.scrollHeight);
await new Promise(resolve => setTimeout(resolve, 2000)); // Wait for more posts to load