Created
March 24, 2023 19:45
-
-
Save YannisDC/eae92eeb64c061532f863d5b26f6f2a6 to your computer and use it in GitHub Desktop.
Gets Reviews from Trustpilot and plots them
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
import csv | |
def fetch_reviews(company_name): | |
# Initialize variables | |
url = f"https://www.trustpilot.com/review/{company_name}" | |
reviews_list = [] | |
while url: | |
# Send a request to the URL | |
response = requests.get(url) | |
soup = BeautifulSoup(response.text, "html.parser") | |
# Find and parse reviews | |
reviews = soup.find_all("section", class_="styles_reviewContentwrapper__zH_9M") | |
for review in reviews: | |
foundTitle = review.find("h2", class_="typography_heading-s__f7029") | |
if foundTitle is None: | |
title = "NaN" | |
else: | |
title = foundTitle.text.strip() | |
foundContent = review.find("p", class_="typography_body-l__KUYFJ") | |
if foundContent is None: | |
content = "NaN" | |
else: | |
content = review.find("p", class_="typography_body-l__KUYFJ").text.strip() | |
rating = review.find("img")["alt"].split()[1] | |
date = review.find("time")["datetime"].split("T")[0] | |
reviews_list.append({"title": title, "content": content, "rating": rating, "date": date}) | |
# Find the next page URL | |
next_page = soup.find("a", {"data-page-number": "next-page"}) | |
if next_page: | |
url = "https://www.trustpilot.com" + next_page["href"] | |
else: | |
url = None | |
# Save reviews to CSV | |
with open(f"{company_name}_reviews.csv", "w", newline="", encoding="utf-8") as csvfile: | |
fieldnames = ["title", "content", "rating", "date"] | |
writer = csv.DictWriter(csvfile, fieldnames=fieldnames) | |
writer.writeheader() | |
for review in reviews_list: | |
writer.writerow(review) | |
print(f"Successfully saved {len(reviews_list)} reviews for {company_name} to {company_name}_reviews.csv") | |
# Example usage | |
company_name = "www.chronopost.fr" | |
fetch_reviews(company_name) | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
def plot_reviews(csv_file): | |
# Read the CSV file and parse dates | |
df = pd.read_csv(csv_file, parse_dates=["date"]) | |
# Convert ratings to numeric values | |
df["rating"] = pd.to_numeric(df["rating"]) | |
# Sort the DataFrame by date | |
df.sort_values("date", inplace=True) | |
# Plot the data | |
plt.plot(df["date"], df["rating"], marker="o", linestyle="") | |
# Set plot labels and title | |
plt.xlabel("Date") | |
plt.ylabel("Rating") | |
plt.title("Reviews Ratings Over Time") | |
# Display the plot | |
plt.show() | |
csv_file = f"{company_name}_reviews.csv" | |
plot_reviews(csv_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment