Created
February 10, 2021 17:27
-
-
Save mkmark/d537af5a59236af8a316c5b37e1bc8f7 to your computer and use it in GitHub Desktop.
wordpress-markdown-exporter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# variables | |
# table prefix default to 'wp_' | |
table_prefix = 'wp_' | |
# wp_config.php abosolute path | |
wp_config_path = r'/home/www/default/wp-config.php' | |
# export markdown file path, make sure it exists | |
export_path = r'/home/www/' | |
import mysql.connector | |
import re | |
import html | |
from wpconfigr import WpConfigFile | |
# connect database | |
wp_config = WpConfigFile(wp_config_path) | |
con = mysql.connector.connect( | |
host=wp_config.get('DB_HOST'), | |
user=wp_config.get('DB_USER'), | |
password=wp_config.get('DB_PASSWORD'), | |
database=wp_config.get('DB_NAME') | |
) | |
wp_posts = table_prefix + 'posts' | |
wp_terms = table_prefix + 'terms' | |
wp_term_taxonomy = table_prefix + 'term_taxonomy' | |
wp_term_relationships = table_prefix + 'term_relationships' | |
wp_users = table_prefix + 'users' | |
# posts info | |
""" | |
SELECT ID, post_author, post_date_gmt, post_content, post_title, post_content_filtered, post_type, post_password, post_status, comment_status | |
FROM wp_posts | |
WHERE post_type = 'post' | |
AND post_title <> 'Auto Draft'; | |
""" | |
cur = con.cursor() | |
cur.execute("\ | |
SELECT ID, post_author, post_date_gmt, post_content, post_title, post_content_filtered, post_type, post_password, post_status, comment_status \ | |
FROM " + wp_posts + " \ | |
WHERE post_type = 'post' \ | |
AND post_title <> 'Auto Draft'; \ | |
") | |
postsd = {} | |
columns = tuple( [d[0] for d in cur.description] ) | |
for row in cur: | |
postsd[row[0]]=(dict(zip(columns, row))) | |
# terms info | |
""" | |
SELECT wp_terms.name, wp_term_taxonomy.taxonomy | |
FROM wp_posts | |
LEFT OUTER JOIN wp_term_relationships | |
ON wp_posts.ID = wp_term_relationships.object_id | |
LEFT OUTER JOIN wp_term_taxonomy | |
ON wp_term_relationships.term_taxonomy_id = wp_term_taxonomy.term_taxonomy_id | |
LEFT OUTER JOIN wp_terms | |
ON wp_term_taxonomy.term_id = wp_terms.term_id | |
WHERE ID = ''; | |
""" | |
# author info | |
""" | |
SELECT display_name | |
FROM wp_users | |
WHERE ID = ''; | |
""" | |
for ID in postsd: | |
cur.execute("\ | |
SELECT " + wp_terms + ".name, " + wp_term_taxonomy + ".taxonomy \ | |
FROM " + wp_posts + " \ | |
LEFT OUTER JOIN " + wp_term_relationships + " \ | |
ON " + wp_posts + ".ID = " + wp_term_relationships + ".object_id \ | |
LEFT OUTER JOIN " + wp_term_taxonomy + " \ | |
ON " + wp_term_relationships + ".term_taxonomy_id = " + wp_term_taxonomy + ".term_taxonomy_id \ | |
LEFT OUTER JOIN " + wp_terms + " \ | |
ON " + wp_term_taxonomy + ".term_id = " + wp_terms + ".term_id \ | |
WHERE ID = " + str(ID) + " \ | |
") | |
postsd[ID]['categories'] = [] | |
postsd[ID]['tags'] = [] | |
for row in cur: | |
if row[1] == 'category': | |
postsd[ID]['categories'].append(row[0]) | |
if row[1] == 'post_tag': | |
postsd[ID]['tags'].append(row[0]) | |
cur.execute("\ | |
SELECT display_name \ | |
FROM " + wp_users + " \ | |
WHERE ID = " + str(postsd[ID]['post_author']) + " \ | |
") | |
for row in cur: | |
postsd[ID]['author'] = row[0] | |
def make_title_path_valid(_str): | |
_str = re.sub(r'[\/]', '-', _str) | |
_str = re.sub(r'[\\]', '-', _str) | |
_str = re.sub(r'[\"]', '-', _str) | |
_str = re.sub(r'[\:]', '-', _str) | |
_str = re.sub(r'[\*]', '-', _str) | |
_str = re.sub(r'[\?]', '-', _str) | |
_str = re.sub(r'[\<]', '-', _str) | |
_str = re.sub(r'[\>]', '-', _str) | |
_str = re.sub(r'[\|]', '-', _str) | |
_str = re.sub(r'[\s]', '-', _str) | |
_str = _str.lower() | |
return _str | |
def make_title_md_valid(_str): | |
_str = r'"' + _str + r'"' | |
return _str | |
for ID in postsd: | |
file_name = postsd[ID]['post_date_gmt'].strftime("%Y-%m-%d-") + make_title_path_valid(postsd[ID]['post_title']) + '.md' | |
file_path = export_path + file_name | |
with open(file_path, 'w', encoding='utf-8', errors='ignore') as md_file: | |
file_content = '---\n' | |
file_content = file_content + 'layout: post\n' | |
file_content = file_content + 'title: ' + make_title_md_valid(postsd[ID]['post_title']) + '\n' | |
file_content = file_content + 'date: ' + postsd[ID]['post_date_gmt'].strftime("%Y-%m-%d %H:%M") + '\n' | |
file_content = file_content + 'author: ' + postsd[ID]['author'] + '\n' | |
file_content = file_content + 'comments: ' + ('true' if postsd[ID]['comment_status'] == 'open' else 'false') + '\n' | |
file_content = file_content + 'categories: ' + str(postsd[ID]['categories']) + '\n' | |
file_content = file_content + 'tags: ' + str(postsd[ID]['tags']) + '\n' | |
file_content = file_content + 'published: ' + ('true' if (postsd[ID]['post_status'] == 'publish') and (postsd[ID]['post_password'] == '') else 'false') + '\n' | |
file_content = file_content + '---\n' | |
if postsd[ID]['post_content_filtered'] != '': | |
file_content = file_content + html.unescape(postsd[ID]['post_content_filtered']) + '\n' | |
else: | |
file_content = file_content + html.unescape(postsd[ID]['post_content']) + '\n' | |
md_file.write(file_content) | |
con.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment