This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import AutoTokenizer, AutoModel | |
import torch | |
# MODEL CKPT is downloaded from: "jinaai/jina-embeddings-v2-base-en" # has context len of 8192 | |
MODEL_CKPT = "/Users/rohan/3_Resources/ai_models/jina-embeddings-v2-base-en" | |
def recursive_splitter(text: str, separators: list[str], chunk_size: int) -> list[str]: | |
if len(separators) == 0: | |
words = text.strip().split(' ') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import dataclasses | |
import openai | |
import os | |
import re | |
import yaml | |
from pydantic import BaseModel, ValidationError | |
from typing import Optional |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--[[ | |
===================================================================== | |
==================== READ THIS BEFORE CONTINUING ==================== | |
===================================================================== | |
======== .-----. ======== | |
======== .----------------------. | === | ======== | |
======== |.-""""""""""""""""""-.| |-----| ======== | |
======== || || | === | ======== | |
======== || KICKSTART.NVIM || |-----| ======== |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Inspiration from Eric Hartford github2file: https://github.com/cognitivecomputations/github2file | |
""" | |
A utility script to download and process files from a GitHub repository based on language-specific criteria. | |
Features: | |
- Download files from a specific branch or tag of a GitHub repository. | |
- Filter files based on their extension to include only those relevant to the specified programming language. | |
- Exclude files located in certain directories or those that match a set of predefined non-useful criteria. | |
- Check and exclude test files based on content indicators specific to each supported language. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function sleep(ms) { | |
return new Promise((resolve) => setTimeout(resolve, ms)); | |
} | |
function clickShowTranscriptButton() { | |
// Selecting the button based on its class and aria-label | |
var buttons = document.querySelectorAll( | |
"button.yt-spec-button-shape-next.yt-spec-button-shape-next--outline.yt-spec-button-shape-next--call-to-action.yt-spec-button-shape-next--size-m" | |
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
async function extractDataAndPaginate() { | |
const results = []; | |
function extractDataFromPage() { | |
const data = []; | |
document.querySelectorAll('tbody.zp_RFed0').forEach((tbody) => { | |
const rows = tbody.querySelectorAll('tr'); | |
rows.forEach((row) => { | |
const nameLink = row.querySelector('a[href*="/contacts/"]'); | |
const linkedInLink = row.querySelector('a[href*="linkedin.com"]'); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(function() { | |
// Initialize an array to hold the conversation data | |
let conversationData = []; | |
// Select all conversation blocks | |
let conversationBlocks = document.querySelectorAll('[data-testid^="conversation-turn"]'); | |
conversationBlocks.forEach(block => { | |
// Determine the role directly from data attributes in the HTML | |
let roleElement = block.querySelector('[data-message-author-role]'); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Updated extractLinkedInPosts function with continuous scrolling | |
async function extractLinkedInPosts() { | |
const postsData = []; | |
const seenTexts = new Set(); | |
let lastLength = 0; | |
while (postsData.length < 20) { | |
window.scrollTo(0, document.body.scrollHeight); | |
await new Promise(resolve => setTimeout(resolve, 2000)); // Wait for more posts to load |