Skip to content

Instantly share code, notes, and snippets.

@DerekHawkins
Created February 23, 2022 02:09
Show Gist options
  • Save DerekHawkins/6d1bea3b8807385283334b1ef2559294 to your computer and use it in GitHub Desktop.
Save DerekHawkins/6d1bea3b8807385283334b1ef2559294 to your computer and use it in GitHub Desktop.
import hubspot
from hubspot.cms.blogs.blog_posts import BlogPost, ApiException
from genson import SchemaBuilder
import dateutil.parser as parser
import requests
import json
import pandas as pd
url = "https://api.hubapi.com/cms/v3/blogs/posts"
mainframe = []
for pag in tqdm(range(100, 1000, 100)): # max limit per call is 100. So we paginate in iterations of 100.
querystring = {"hapikey":"hubspot_api_key",
'offset':str(pag),
'limit':'100'}
headers = {'accept': 'application/json'}
response = requests.request("GET", url, headers=headers, params=querystring)
real_response = response.json()
frame = []
for crawl in range(len(real_response['results'])):
data = {'id':real_response['results'][crawl]['id'],
'title':real_response['results'][crawl]['htmlTitle'],
'author':real_response['results'][crawl]['authorName'],
'url':real_response['results'][crawl]['url'],
'datePublished':real_response['results'][crawl]['created'],
'dateModified':real_response['results'][crawl]['updated'],
'image':real_response['results'][crawl]['featuredImage'],
'description':real_response['results'][crawl]['metaDescription']}
frame.append(data)
df = pd.DataFrame(frame)
mainframe.append(df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment