Skip to content

Instantly share code, notes, and snippets.

@MeRahulAhire
Last active February 19, 2022 22:35
Show Gist options
  • Save MeRahulAhire/9032a1a7894af9576c3257f9c08aca26 to your computer and use it in GitHub Desktop.
Save MeRahulAhire/9032a1a7894af9576c3257f9c08aca26 to your computer and use it in GitHub Desktop.
Get details about Athena table and how to create it.

1st getting all the schema for main Tables

require("dotenv").config();
const AWS = require("aws-sdk");

const athena = new AWS.Athena({
  accessKeyId: process.env.ACCESS_KEY,
  secretAccessKey: process.env.SECRET_KEY,
  region: process.env.REGION,
  signatureVersion: "v4",
});

var params = {
  CatalogName: "AwsDataCatalog",
  DatabaseName: "test",
  TableName: 'test1' // assuming there's only one master table in database. Otherwise, first we need to pick table by "SHOW tables"
};

athena.getTableMetadata(params, function (err, data) {
  if (err) console.log(err, err.stack);
  else console.log(data.TableMetadata.Columns);
});

Result :

[
  { Name: 'customer', Type: 'int' },
  { Name: 'customername', Type: 'string' },
  { Name: 'address', Type: 'string' },
  { Name: 'city', Type: 'string' },
  { Name: 'postalcode', Type: 'int' }
]

To create child table out of master tables schema, select attribute and send to backend with their types as followed

{
"schema" : "customer int, address string"
}

Create child tables

require("dotenv").config();
const AWS = require("aws-sdk");
const express = require("express");
const app = express();
app.use(express.json());

const athena = new AWS.Athena({
  accessKeyId: process.env.ACCESS_KEY,
  secretAccessKey: process.env.SECRET_KEY,
  region: process.env.REGION,
  signatureVersion: "v4",
});


app.post("/createChildTable", (req, res) => {

  const tableName = req.body.tableName
  const schema = req.body.schema

  let params = {
    QueryString: `CREATE EXTERNAL TABLE IF NOT EXISTS 'master'.'${tableName}' (
      ${schema}
      )
      ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
      WITH SERDEPROPERTIES (
      'serialization.format' = ',',
      'field.delim' = ','
      ) LOCATION 's3://bucket-location'
      TBLPROPERTIES ('has_encrypted_data'='false');
      `,

    QueryExecutionContext: {
      Database: "test",
    },
    ResultConfiguration: {
      OutputLocation: "s3://result-location/",
    },
  };
  athena.startQueryExecution(params, function (err, data){
    if (err) console.log(err, err.stack);
    // an error occurred
    else console.log(data); // successful response
    let queryId = data.QueryExecutionId
    setTimeout(() => {
      var params2 = {
        QueryExecutionId: queryId /* required */,
      };
      athena.getQueryResults(params2, function (err, result) {
        if (err) console.log(err, err.stack);
        
        else console.log(result);
        res.send(finalRes); 
      });
    }, 3000);
  });
});

app.listen(process.env.PORT || 3000, () => {
  console.log("Server running on port 3000");
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment