Skip to content

Instantly share code, notes, and snippets.

@alexdanilowicz
Last active March 15, 2023 01:49
Show Gist options
  • Save alexdanilowicz/56c7124d7b55ce76a3706bd5d96add01 to your computer and use it in GitHub Desktop.
Save alexdanilowicz/56c7124d7b55ce76a3706bd5d96add01 to your computer and use it in GitHub Desktop.
sql queries to read chat.db file
-- author: alex.danilowicz@gmail.com
-- SQlite Queries to Read chat.db file
-- top 5 sent / receieved words, just pass is_from_me as param
WITH RECURSIVE split_text(guid, text, etc) AS
(
SELECT
m.guid, '', m.text || ' '
FROM message m
-- make sure the text isn't blank
WHERE m.text IS NOT NULL
-- and it's a sent message
AND m.is_from_me = 1
UNION ALL
SELECT
guid, SUBSTR(etc, 0, INSTR(etc, ' ')), SUBSTR(etc, INSTR(etc, ' ')+1)
FROM split_text
WHERE etc <> ''
)
SELECT
text, COUNT(text) as count
FROM split_text
-- stop words
WHERE TRIM(LOWER(text)) NOT IN ("", " ", "u", "would", "like", "im", "ur", "get", "yeah", "cause", "i","me","my","myself","we","our","ours","ourselves","you","your","yours","yourself","yourselves","he","him","his","himself","she","her","hers","herself","it","its","itself","they","them","their","theirs","themselves","what","which","who","whom","this","that","these","those","am","is","are","was","were","be","been","being","have","has","had","having","do","does","did","doing","a","an","the","and","but","if","or","because","as","until","while","of","at","by","for","with","about","against","between","into","through","during","before","after","above","below","to","from","up","down","in","out","on","off","over","under","again","further","then","once","here","there","when","where","why","how","all","any","both","each","few","more","most","other","some","such","no","nor","not","only","own","same","so","than","too","very","s","t","can","will","just","don","should","now")
-- object replacement character
AND unicode(TRIM(LOWER(text))) != 65532
GROUP BY text
ORDER BY count DESC
LIMIT 5
-- total number of texts, sent v. received, excludes group chats
SELECT COUNT(*) as count,
is_from_me AS sent
FROM message
WHERE message.cache_roomnames IS NULL
GROUP BY is_from_me
-- top texted friends, excludes group chats
SELECT COUNT(*) as count, h.id
FROM message JOIN handle h ON h.ROWID = handle_id
WHERE message.cache_roomnames IS NULL
GROUP BY h.id
ORDER BY count DESC
------- DATE QUERIES ------
WITH DATE_TIME_TABLE AS
(SELECT
guid as dt_guid,
CASE
WHEN msg.date > 10000000000
THEN
substr(datetime((msg.date/1000000000) + strftime('%s','2001-01-01 01:01:01'), 'unixepoch', 'localtime'), 0, 20)
ELSE
substr(datetime(msg.date + strftime('%s','2001-01-01 01:01:01'), 'unixepoch', 'localtime'), 0, 20)
END AS dt_date
FROM message msg)
-- texts per hour, probs makes sense to exclude group chats
SELECT strftime('%H', dt_date) as hour, is_from_me AS sent, COUNT(guid) as count
FROM DATE_TIME_TABLE
JOIN message
ON dt_guid = guid
WHERE message.cache_roomnames IS NULL
GROUP BY strftime('%H', dt_date), is_from_me
ORDER BY count DESC
-- texts per day, probs makes sense to exclude group chats
SELECT DATE(dt_date) as day, is_from_me AS sent, COUNT(guid) as count
FROM DATE_TIME_TABLE
JOIN message
ON dt_guid = guid
WHERE message.cache_roomnames IS NULL
GROUP BY DATE(dt_date), is_from_me
ORDER BY count DESC
-------------------------------------------------
-- old main query
SELECT
CASE
WHEN a.date > 10000000000
THEN
substr(datetime((a.date/1000000000) + strftime('%s','2001-01-01 01:01:01'), 'unixepoch'), 0, 20)
ELSE
substr(datetime(a.date + strftime('%s','2001-01-01 01:01:01'), 'unixepoch'), 0, 20)
END AS date,
b.id,
lower(a.text) AS text,
a.is_from_me AS sent,
a.cache_roomnames AS group_chat
FROM message a
JOIN handle b ON b.ROWID = a.handle_id
WHERE
a.text IS NOT NULL
AND length(b.id) > 7
AND group_chat is NULL;
------- CONTACT QUERIES ------
-- CREATE TABLE CONTACTS_TABLE AS
WITH CONTACTS_CLEAN_TABLE AS (
SELECT
ZABCDRECORD.Z_PK,
ZABCDRECORD.ZFIRSTNAME,
ZABCDRECORD.ZLASTNAME,
-- TODO: I understand this is ugly... use regex.
replace(replace(replace(replace(replace(replace(ZABCDPHONENUMBER.ZFULLNUMBER, "(", ""), ")", ""), "_", ""), "-", ""), " ", ""), "+", "") as ZFULLNUMBER
FROM ZABCDRECORD
LEFT JOIN ZABCDPHONENUMBER ON ZABCDPHONENUMBER.ZOWNER = ZABCDRECORD.Z_PK
WHERE ZABCDPHONENUMBER.ZFULLNUMBER IS NOT NULL
)
--- NOTE: in the chat.db the format is +1619... in other words
--- chat.db lists the country code always. The addressbook.db, however, does not always include the country code.
--- Therefore, we remove the + and the country code, and FILTER on the 10-digit length.
--- All phone numbers in the US/Canada are 10-digit... this assumption may not work as well for users in other countries
SELECT
ZFIRSTNAME || " " || ZLASTNAME as contact_name,
CASE WHEN LENGTH(ZFULLNUMBER) > 10
THEN SUBSTR(ZFULLNUMBER, -10, 10)
ELSE ZFULLNUMBER
END AS contact_phone
FROM CONTACTS_CLEAN_TABLE
WITH GROUP_CHAT_NAMES AS (select
group_concat(distinct coalesced_contact_name) as participants,
display_name,
cmj.chat_id
from
chat c
join chat_message_join cmj on cmj.chat_id = c."ROWID"
join core_main_table m on m. "ROWID" = cmj.message_id
group by
c."ROWID"
having
count(distinct coalesced_contact_name) > 1),
-- texts in a group chat
CORE_GROUP_CHAT_TABLE AS (
SELECT
text,
display_name,
human_readable_date,
coalesce(coalesced_contact_name, "you") as contact_name,
participants, is_from_me,
associated_message_type,
CASE WHEN display_name = "" THEN participants ELSE display_name END as group_chat_name
FROM core_main_table cm
JOIN GROUP_CHAT_NAMES gcm
on cm.chat_id = gcm.chat_id)
-- who gives reactions
SELECT * FROM CORE_GROUP_CHAT_TABLE WHERE associated_message_type != 0;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment