Skip to content

Instantly share code, notes, and snippets.

View JnsFerreira's full-sized avatar
🖥️
Coding the future

Jonas Ferreira JnsFerreira

🖥️
Coding the future
View GitHub Profile
@JnsFerreira
JnsFerreira / .py
Created July 18, 2023 17:29
athena_create_table_generator.py
import boto3
def get_glue_table_schema(database_name, table_name):
client = boto3.client('glue')
response = client.get_table(
DatabaseName=database_name,
Name=table_name
)
# References:
# https://aws.amazon.com/blogs/big-data/load-data-incrementally-and-optimized-parquet-writer-with-aws-glue/
import sys
from awsglue.job import Job
from awsglue.transforms import *
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.utils import getResolvedOptions
@JnsFerreira
JnsFerreira / crawler.py
Last active August 3, 2021 19:06
book_crawler
# Built-in libraries
from time import time
import threading
import logging
import requests
import math
# Project libraries
from config import functions as f
from config import selenium_functions as sf
@JnsFerreira
JnsFerreira / map_example.py
Created February 1, 2021 23:52
Using map in Apache Beam
import apache_beam as beam
with beam.Pipeline() as pipeline:
plants = (
pipeline
| 'Gardening plants' >> beam.Create([
' 🍓Strawberry \n',
' 🥕Carrot \n',
' 🍆Eggplant \n',
' 🍅Tomato \n',