Last active
March 15, 2023 01:49
-
-
Save alexdanilowicz/56c7124d7b55ce76a3706bd5d96add01 to your computer and use it in GitHub Desktop.
sql queries to read chat.db file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- author: alex.danilowicz@gmail.com | |
-- SQlite Queries to Read chat.db file | |
-- top 5 sent / receieved words, just pass is_from_me as param | |
WITH RECURSIVE split_text(guid, text, etc) AS | |
( | |
SELECT | |
m.guid, '', m.text || ' ' | |
FROM message m | |
-- make sure the text isn't blank | |
WHERE m.text IS NOT NULL | |
-- and it's a sent message | |
AND m.is_from_me = 1 | |
UNION ALL | |
SELECT | |
guid, SUBSTR(etc, 0, INSTR(etc, ' ')), SUBSTR(etc, INSTR(etc, ' ')+1) | |
FROM split_text | |
WHERE etc <> '' | |
) | |
SELECT | |
text, COUNT(text) as count | |
FROM split_text | |
-- stop words | |
WHERE TRIM(LOWER(text)) NOT IN ("", " ", "u", "would", "like", "im", "ur", "get", "yeah", "cause", "i","me","my","myself","we","our","ours","ourselves","you","your","yours","yourself","yourselves","he","him","his","himself","she","her","hers","herself","it","its","itself","they","them","their","theirs","themselves","what","which","who","whom","this","that","these","those","am","is","are","was","were","be","been","being","have","has","had","having","do","does","did","doing","a","an","the","and","but","if","or","because","as","until","while","of","at","by","for","with","about","against","between","into","through","during","before","after","above","below","to","from","up","down","in","out","on","off","over","under","again","further","then","once","here","there","when","where","why","how","all","any","both","each","few","more","most","other","some","such","no","nor","not","only","own","same","so","than","too","very","s","t","can","will","just","don","should","now") | |
-- object replacement character | |
AND unicode(TRIM(LOWER(text))) != 65532 | |
GROUP BY text | |
ORDER BY count DESC | |
LIMIT 5 | |
-- total number of texts, sent v. received, excludes group chats | |
SELECT COUNT(*) as count, | |
is_from_me AS sent | |
FROM message | |
WHERE message.cache_roomnames IS NULL | |
GROUP BY is_from_me | |
-- top texted friends, excludes group chats | |
SELECT COUNT(*) as count, h.id | |
FROM message JOIN handle h ON h.ROWID = handle_id | |
WHERE message.cache_roomnames IS NULL | |
GROUP BY h.id | |
ORDER BY count DESC | |
------- DATE QUERIES ------ | |
WITH DATE_TIME_TABLE AS | |
(SELECT | |
guid as dt_guid, | |
CASE | |
WHEN msg.date > 10000000000 | |
THEN | |
substr(datetime((msg.date/1000000000) + strftime('%s','2001-01-01 01:01:01'), 'unixepoch', 'localtime'), 0, 20) | |
ELSE | |
substr(datetime(msg.date + strftime('%s','2001-01-01 01:01:01'), 'unixepoch', 'localtime'), 0, 20) | |
END AS dt_date | |
FROM message msg) | |
-- texts per hour, probs makes sense to exclude group chats | |
SELECT strftime('%H', dt_date) as hour, is_from_me AS sent, COUNT(guid) as count | |
FROM DATE_TIME_TABLE | |
JOIN message | |
ON dt_guid = guid | |
WHERE message.cache_roomnames IS NULL | |
GROUP BY strftime('%H', dt_date), is_from_me | |
ORDER BY count DESC | |
-- texts per day, probs makes sense to exclude group chats | |
SELECT DATE(dt_date) as day, is_from_me AS sent, COUNT(guid) as count | |
FROM DATE_TIME_TABLE | |
JOIN message | |
ON dt_guid = guid | |
WHERE message.cache_roomnames IS NULL | |
GROUP BY DATE(dt_date), is_from_me | |
ORDER BY count DESC | |
------------------------------------------------- | |
-- old main query | |
SELECT | |
CASE | |
WHEN a.date > 10000000000 | |
THEN | |
substr(datetime((a.date/1000000000) + strftime('%s','2001-01-01 01:01:01'), 'unixepoch'), 0, 20) | |
ELSE | |
substr(datetime(a.date + strftime('%s','2001-01-01 01:01:01'), 'unixepoch'), 0, 20) | |
END AS date, | |
b.id, | |
lower(a.text) AS text, | |
a.is_from_me AS sent, | |
a.cache_roomnames AS group_chat | |
FROM message a | |
JOIN handle b ON b.ROWID = a.handle_id | |
WHERE | |
a.text IS NOT NULL | |
AND length(b.id) > 7 | |
AND group_chat is NULL; | |
------- CONTACT QUERIES ------ | |
-- CREATE TABLE CONTACTS_TABLE AS | |
WITH CONTACTS_CLEAN_TABLE AS ( | |
SELECT | |
ZABCDRECORD.Z_PK, | |
ZABCDRECORD.ZFIRSTNAME, | |
ZABCDRECORD.ZLASTNAME, | |
-- TODO: I understand this is ugly... use regex. | |
replace(replace(replace(replace(replace(replace(ZABCDPHONENUMBER.ZFULLNUMBER, "(", ""), ")", ""), "_", ""), "-", ""), " ", ""), "+", "") as ZFULLNUMBER | |
FROM ZABCDRECORD | |
LEFT JOIN ZABCDPHONENUMBER ON ZABCDPHONENUMBER.ZOWNER = ZABCDRECORD.Z_PK | |
WHERE ZABCDPHONENUMBER.ZFULLNUMBER IS NOT NULL | |
) | |
--- NOTE: in the chat.db the format is +1619... in other words | |
--- chat.db lists the country code always. The addressbook.db, however, does not always include the country code. | |
--- Therefore, we remove the + and the country code, and FILTER on the 10-digit length. | |
--- All phone numbers in the US/Canada are 10-digit... this assumption may not work as well for users in other countries | |
SELECT | |
ZFIRSTNAME || " " || ZLASTNAME as contact_name, | |
CASE WHEN LENGTH(ZFULLNUMBER) > 10 | |
THEN SUBSTR(ZFULLNUMBER, -10, 10) | |
ELSE ZFULLNUMBER | |
END AS contact_phone | |
FROM CONTACTS_CLEAN_TABLE | |
WITH GROUP_CHAT_NAMES AS (select | |
group_concat(distinct coalesced_contact_name) as participants, | |
display_name, | |
cmj.chat_id | |
from | |
chat c | |
join chat_message_join cmj on cmj.chat_id = c."ROWID" | |
join core_main_table m on m. "ROWID" = cmj.message_id | |
group by | |
c."ROWID" | |
having | |
count(distinct coalesced_contact_name) > 1), | |
-- texts in a group chat | |
CORE_GROUP_CHAT_TABLE AS ( | |
SELECT | |
text, | |
display_name, | |
human_readable_date, | |
coalesce(coalesced_contact_name, "you") as contact_name, | |
participants, is_from_me, | |
associated_message_type, | |
CASE WHEN display_name = "" THEN participants ELSE display_name END as group_chat_name | |
FROM core_main_table cm | |
JOIN GROUP_CHAT_NAMES gcm | |
on cm.chat_id = gcm.chat_id) | |
-- who gives reactions | |
SELECT * FROM CORE_GROUP_CHAT_TABLE WHERE associated_message_type != 0; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment