Last active
December 8, 2023 01:43
-
-
Save shawngraham/613e70878d5e0c6603824fea45bf02d9 to your computer and use it in GitHub Desktop.
a little tkinter app that gets headlines from newsapi, and if you want, will go to the url, read the article, and format output as knowledgraph triples. But don't forget to export results.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tkinter as tk | |
from tkinter import filedialog | |
from tkinter import ttk, filedialog | |
import pandas as pd | |
from newsapi import NewsApiClient | |
import llm | |
import requests | |
from strip_tags import strip_tags | |
model = llm.get_model("orca-mini-3b-gguf2-q4_0") #local model through llm plugin llm-gpt4all. | |
#model = llm.get_model("4t") #or use gpt modesl | |
#model.key = 'model api key here' #for which you'll need a key | |
# Initialize the NewsApiClient with a placeholder key | |
newsapi = NewsApiClient(api_key='api here') | |
# Add a global variable to hold the articles data | |
articles_data = None # This will be populated with the articles data | |
#function to process text with llm | |
def llm_processing(content): | |
# Process the content and return the result | |
try: | |
# prompt | |
prompt_text = f"You are a knowledge graph assistant. Extract entities and predicates from the provided text:\n\n{content}\n\nReturn ONLY the triples formatted for cvs: entity1,predicate,entity2." | |
# Generate a response from the model | |
response = model.prompt(prompt_text) | |
return response | |
except Exception as e: | |
# Handle any exceptions that occur during processing | |
print(f"An error occurred: {e}") | |
return None | |
# Function to get the news data | |
def get_news(): | |
global articles_data # Declare articles_data as global to modify it | |
query = query_entry.get() | |
all_stories = newsapi.get_everything(q=query, language='en') | |
articles = all_stories.get('articles', []) | |
# Construct a data dictionary for DataFrame initialization | |
data = {'title': [], 'content': [], 'url': [], 'llm': []} | |
for article in articles: | |
# Use the get method to avoid KeyError and provide a default value if the key is missing | |
title = article.get('title', 'No Title Available') | |
content = article.get('content', 'No Content Available') | |
url = article.get('url', 'No Url Available') | |
# Append the results to the data dictionary (llm is empty for now) | |
data['title'].append(title) | |
data['content'].append(content) | |
data['url'].append(url) | |
data['llm'].append('') # Placeholder for llm result | |
# Store the articles data for later processing | |
articles_data = pd.DataFrame(data) | |
# Show the DataFrame with empty llm column | |
show_dataframe(articles_data) | |
# Enable the process button | |
process_button.config(state='normal') | |
# Function to retrieve HTML and process it | |
def process_article_url(url): | |
try: | |
response = requests.get(url) | |
response.raise_for_status() # Raise an error for bad status | |
# Use strip_tags to clean up the HTML | |
html_content = strip_tags( | |
response.text, | |
["div"], # Exclude <div> tags | |
minify=True, # Minify the html to remove extra spaces and new lines | |
keep_tags=["h1"] # Keep <h1> tags | |
) | |
# Once we get the cleaned text, we can process it through llm_processing | |
return llm_processing(html_content) | |
except requests.RequestException as e: | |
print(f"An error occurred while fetching the article: {e}") | |
return None | |
# Function to process news data with llm ; which works, but 'content' isn't very much | |
def process_news_with_llm(): | |
global articles_data | |
if articles_data is not None: | |
# Update the 'llm' column by processing each url with process_article_url | |
articles_data['llm'] = articles_data['url'].apply(process_article_url) | |
# Update the displayed DataFrame | |
show_dataframe(articles_data) | |
# Enable the export button and set the command to export the df to a file | |
export_button.config(state='normal', command=lambda: export_to_file(articles_data)) | |
# Function to export the dataframe to a file | |
def export_to_file(df): | |
filename = filedialog.asksaveasfilename(defaultextension=".csv", filetypes=[("CSV files", "*.csv"), ("All files", "*.*")]) | |
if filename: | |
df.to_csv(filename, sep='|', index=False) | |
# Set up the tkinter window | |
root = tk.Tk() | |
root.title('News API Interface') | |
# Add the entry widget for queries | |
query_entry = tk.Entry(root, width=50) | |
query_entry.pack() | |
# Add the get news button | |
get_news_button = tk.Button(root, text='Get Headlines', command=get_news) | |
get_news_button.pack() | |
# Add the process button to trigger llm processing | |
process_button = tk.Button(root, text='Process Articles with LLM', state='disabled', command=process_news_with_llm) | |
process_button.pack() | |
# Add the export button | |
export_button = tk.Button(root, text='Export', state='disabled') # Initially disabled until news is fetched | |
export_button.pack() | |
# Create the label widget | |
top_left_label = tk.Label(root, text="News will load fast, if there is any. LLM will process slowly.", anchor="nw") | |
# Place the label at the top left using pack | |
top_left_label.pack(anchor="nw", pady=(5, 0), padx=(5, 0)) | |
def update_treeview(df, treeview): | |
# Clear current items in the treeview | |
treeview.delete(*treeview.get_children()) | |
# Add new items to the treeview | |
for _, row in df.iterrows(): | |
treeview.insert('', 'end', values=list(row)) | |
# Create Treeview widget inside a frame for a scrollbar | |
tree_frame = tk.Frame(root) | |
tree_frame.pack() | |
# Add a scrollbar | |
tree_scroll = tk.Scrollbar(tree_frame) | |
tree_scroll.pack(side=tk.RIGHT, fill=tk.Y) | |
# Define the Treeview | |
tree = ttk.Treeview(tree_frame, yscrollcommand=tree_scroll.set, selectmode='browse') | |
tree.pack() | |
# Configure scrollbar | |
tree_scroll.config(command=tree.yview) | |
# Define our columns | |
tree['columns'] = ('Title', 'Content', 'URL', 'LLM Result') | |
# Format our columns | |
for col in tree['columns']: | |
tree.column(col, anchor='w', width=240) | |
tree.heading(col, text=col, anchor='w') | |
# Set the overall width of the Treeview widget | |
tree_width = 1200 # Calculate the total width based on individual column widths | |
tree.pack(fill='x', expand=True) # Allow the treeview to expand and fill the x direction of its container | |
def show_dataframe(df): | |
# Call the function to update the treeview with the new DataFrame | |
update_treeview(df, tree) | |
# Run the application | |
root.mainloop() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment