CodeSigils · January 31, 2022 12:56
diff --git a/data-scrap01.py b/data-scrap01.py
 #!/usr/bin/env python
 # coding: utf-8

 # #### Step 1 - Imports

 # In[4]:


 import requests
 import pandas as pd
 import xlsxwriter


 # #### Step 2 - Requests & CURL

 # - The request copied from dev tools using the right click / "Copy as Curl(bash)" menu and converted to Python using https://curlconverter.com/#python online tool

 # In[11]:


 # curl command from developer tools converted from: https://curlconverter.com/#python
 headers = {
    'sec-ch-ua': '^\\^Chromium^\\^;v=^\\^94^\\^, ^\\^Google',
    'Referer': 'https://www.ebooks.com/en-us/subjects/computers/',
    'sec-ch-ua-mobile': '?0',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4590.0 Safari/537.36',
    'sec-ch-ua-platform': '^\\^Windows^\\^',
    'Content-Type': 'application/json',
 }

 params = (
    ('subjectId', '13'),
    ('pageNumber', '1'),
    ('countryCode', 'US'),
 )
 # This comes from the CURL converter: https://curlconverter.com/#python
 response = requests.get('https://www.ebooks.com/api/search/subject/', headers=headers, params=params)


 # #### Step 3 - Check Status Code

 # In[7]:


 response


 # In[47]:


 params[2][1]


 # #### Step 4 - Create Json Object

 # In[9]:


 response.json()


 # In[7]:


 type(response.json())


 # #### Step 5 - Output Keys

 # In[8]:


 response.json().keys()


 # #### Step 6 - Find your Data

 # - Define data points and then access everything in 'books' key:

 # In[15]:


 # title
 # subtitle
 # author
 # publisher
 # publication year
 # price


 # In[9]:


 response.json()['books']


 # - Find the length of the request:

 # In[10]:


 results_json = response.json()['books']


 # In[11]:


 len(results_json)


 # - Get the Results for the first item:

 # In[12]:


 results_json[0]


 # In[13]:


 # thumbnail
 results_json[0]['image_url']


 # In[14]:


 # title
 results_json[0]['title']


 # In[15]:


 # subtitle
 results_json[0]['subtitle']


 # In[38]:


 # author
 results_json[0]['authors'][0]['author_name']


 # In[16]:


 # publisher
 results_json[0]['publisher']


 # In[17]:


 # publication year
 results_json[0]['publication_year']


 # In[18]:


 # price
 results_json[0]['price']


 # #### Step 7 - Put everything together - Loop through results and append data inside a list

 # In[19]:


 title = []
 subtitle = []
 author = []
 publisher = []
 publication_year = []
 price = []

 for result in results_json:
    
    # title
    title.append(result['title'])
    
    # subtitle
    subtitle.append(result['subtitle'])
    
    # author
    author.append(result['authors'][0]['author_name'])
    
    # publisher
    publisher.append(result['publisher'])
    
    # publication_year
    publication_year.append(result['publication_year'])
    
    # price
    price.append(result['price'])


 # In[48]:


 # double check
 author, '----', title


 # #### Step 8 - Pandas Dataframe

 # In[21]:


 books_df = pd.DataFrame({'Title':title, 'Subtitle':subtitle, 'Author':author, 'Publisher':publisher,
                        'Publication Year': publication_year, 'Price':price})


 # In[22]:


 books_df


 # #### Step 9 - Store results in Excel

 # In[23]:


 books_df.to_excel('books.xlsx', index=False)
	#!/usr/bin/env python
	# coding: utf-8

	# #### Step 1 - Imports

	# In[4]:


	import requests
	import pandas as pd
	import xlsxwriter


	# #### Step 2 - Requests & CURL

	# - The request copied from dev tools using the right click / "Copy as Curl(bash)" menu and converted to Python using https://curlconverter.com/#python online tool

	# In[11]:


	# curl command from developer tools converted from: https://curlconverter.com/#python
	headers = {
	'sec-ch-ua': '^\\^Chromium^\\^;v=^\\^94^\\^, ^\\^Google',
	'Referer': 'https://www.ebooks.com/en-us/subjects/computers/',
	'sec-ch-ua-mobile': '?0',
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4590.0 Safari/537.36',
	'sec-ch-ua-platform': '^\\^Windows^\\^',
	'Content-Type': 'application/json',
	}

	params = (
	('subjectId', '13'),
	('pageNumber', '1'),
	('countryCode', 'US'),
	)
	# This comes from the CURL converter: https://curlconverter.com/#python
	response = requests.get('https://www.ebooks.com/api/search/subject/', headers=headers, params=params)


	# #### Step 3 - Check Status Code

	# In[7]:


	response


	# In[47]:


	params[2][1]


	# #### Step 4 - Create Json Object

	# In[9]:


	response.json()


	# In[7]:


	type(response.json())


	# #### Step 5 - Output Keys

	# In[8]:


	response.json().keys()


	# #### Step 6 - Find your Data

	# - Define data points and then access everything in 'books' key:

	# In[15]:


	# title
	# subtitle
	# author
	# publisher
	# publication year
	# price


	# In[9]:


	response.json()['books']


	# - Find the length of the request:

	# In[10]:


	results_json = response.json()['books']


	# In[11]:


	len(results_json)


	# - Get the Results for the first item:

	# In[12]:


	results_json[0]


	# In[13]:


	# thumbnail
	results_json[0]['image_url']


	# In[14]:


	# title
	results_json[0]['title']


	# In[15]:


	# subtitle
	results_json[0]['subtitle']


	# In[38]:


	# author
	results_json[0]['authors'][0]['author_name']


	# In[16]:


	# publisher
	results_json[0]['publisher']


	# In[17]:


	# publication year
	results_json[0]['publication_year']


	# In[18]:


	# price
	results_json[0]['price']


	# #### Step 7 - Put everything together - Loop through results and append data inside a list

	# In[19]:


	title = []
	subtitle = []
	author = []
	publisher = []
	publication_year = []
	price = []

	for result in results_json:

	# title
	title.append(result['title'])

	# subtitle
	subtitle.append(result['subtitle'])

	# author
	author.append(result['authors'][0]['author_name'])

	# publisher
	publisher.append(result['publisher'])

	# publication_year
	publication_year.append(result['publication_year'])

	# price
	price.append(result['price'])


	# In[48]:


	# double check
	author, '----', title


	# #### Step 8 - Pandas Dataframe

	# In[21]:


	books_df = pd.DataFrame({'Title':title, 'Subtitle':subtitle, 'Author':author, 'Publisher':publisher,
	'Publication Year': publication_year, 'Price':price})


	# In[22]:


	books_df


	# #### Step 9 - Store results in Excel

	# In[23]:


	books_df.to_excel('books.xlsx', index=False)