DerekHawkins · February 23, 2022 02:09
diff --git a/hubspot_article_schema_1.py b/hubspot_article_schema_1.py
 import hubspot
 from hubspot.cms.blogs.blog_posts import BlogPost, ApiException

 from genson import SchemaBuilder
 import dateutil.parser as parser

 import requests
 import json
 import pandas as pd

 url = "https://api.hubapi.com/cms/v3/blogs/posts"

 mainframe = []
 for pag in tqdm(range(100, 1000, 100)): # max limit per call is 100. So we paginate in iterations of 100.
    querystring = {"hapikey":"hubspot_api_key", 
                  'offset':str(pag),
                   'limit':'100'}

    headers = {'accept': 'application/json'}

    response = requests.request("GET", url, headers=headers, params=querystring)
    real_response = response.json()
    frame = []
    for crawl in range(len(real_response['results'])):
        data = {'id':real_response['results'][crawl]['id'],
        'title':real_response['results'][crawl]['htmlTitle'],
        'author':real_response['results'][crawl]['authorName'],
        'url':real_response['results'][crawl]['url'], 
        'datePublished':real_response['results'][crawl]['created'], 
        'dateModified':real_response['results'][crawl]['updated'],
        'image':real_response['results'][crawl]['featuredImage'],
        'description':real_response['results'][crawl]['metaDescription']}
        frame.append(data)
            
    df = pd.DataFrame(frame)
    mainframe.append(df)
	import hubspot
	from hubspot.cms.blogs.blog_posts import BlogPost, ApiException

	from genson import SchemaBuilder
	import dateutil.parser as parser

	import requests
	import json
	import pandas as pd

	url = "https://api.hubapi.com/cms/v3/blogs/posts"

	mainframe = []
	for pag in tqdm(range(100, 1000, 100)): # max limit per call is 100. So we paginate in iterations of 100.
	querystring = {"hapikey":"hubspot_api_key",
	'offset':str(pag),
	'limit':'100'}

	headers = {'accept': 'application/json'}

	response = requests.request("GET", url, headers=headers, params=querystring)
	real_response = response.json()
	frame = []
	for crawl in range(len(real_response['results'])):
	data = {'id':real_response['results'][crawl]['id'],
	'title':real_response['results'][crawl]['htmlTitle'],
	'author':real_response['results'][crawl]['authorName'],
	'url':real_response['results'][crawl]['url'],
	'datePublished':real_response['results'][crawl]['created'],
	'dateModified':real_response['results'][crawl]['updated'],
	'image':real_response['results'][crawl]['featuredImage'],
	'description':real_response['results'][crawl]['metaDescription']}
	frame.append(data)

	df = pd.DataFrame(frame)
	mainframe.append(df)