|
import { browser, $ } from "@wdio/globals"; |
|
import * as fs from "fs"; |
|
|
|
const SELECTORS = { |
|
conversationItem: ".conversation-list-item", |
|
conversationPersonName: ".artdeco-entity-lockup__title", |
|
messageContainer: ".message-container-width ul", |
|
messageOlderMessages: '[aria-label="Oudere berichten laden"]', |
|
messagePerson: '[data-anonymize="person"]', |
|
}; |
|
const startMessage = 1; |
|
const endMessage = 1000; |
|
const filePath = "messages.csv"; |
|
let messageCount = 0; |
|
|
|
describe("Collect messages from LinkedIn Sales Navigator", () => { |
|
it("should scrape and store the archived messages in a CSV file", async () => { |
|
if (!fs.existsSync(filePath)) { |
|
fs.writeFileSync(filePath, "Person Name,Message\n", "utf8"); |
|
} |
|
|
|
await browser.url( |
|
`https://www.linkedin.com/sales/inbox/2-ZmFhNDIxMDEtMDUyZi00YmNhLWJkNDMtZGY0ODA2Mzg3ODFmXzAxMg==?filter=ARCHIVED` |
|
); |
|
await browser.waitUntil( |
|
async () => |
|
((await $$(".conversation-list-item")).length as unknown as number) > 0 |
|
); |
|
|
|
const conversationItems = await getConversationItems(); |
|
|
|
for ( |
|
let i = startMessage - 1; |
|
i < Math.min(endMessage, conversationItems.length); |
|
i++ |
|
) { |
|
const conversationItem = conversationItems[i]; |
|
const personName = await conversationItem |
|
.$(SELECTORS.conversationPersonName) |
|
.getText(); |
|
console.log(`Collecting the messages of ${personName} (Row ${i + 1})`); |
|
await conversationItem.$("a").click(); |
|
// |
|
// Wait for the messages to load. We do that by checking if the person's name is present in the messages. |
|
// This method also checks if all messages are loaded, if not it will scroll to the top and check again. |
|
await waitForAllMessagesLoaded(personName); |
|
const messages = await $(SELECTORS.messageContainer).getText(); |
|
const csvRow = formatCsvRow(personName, messages); |
|
writeCsvRow(csvRow, filePath); |
|
|
|
messageCount++; |
|
|
|
if (messageCount % 100 === 0) { |
|
console.log( |
|
`Processed ${messageCount} messages. Taking a 2-minute break...` |
|
); |
|
await browser.pause(2 * 60 * 1000); |
|
} |
|
} |
|
}); |
|
}); |
|
|
|
/** |
|
* Get all conversation items |
|
*/ |
|
async function getConversationItems(): Promise<WebdriverIO.ElementArray> { |
|
const conversationItems = await $$(SELECTORS.conversationItem); |
|
return scrollToLoadMoreConversations( |
|
conversationItems.length as unknown as number |
|
); |
|
} |
|
|
|
/** |
|
* Keep scrolling to load more conversations until no new conversations are loaded |
|
*/ |
|
async function scrollToLoadMoreConversations( |
|
initialCount: number |
|
): Promise<ChainablePromiseArray> { |
|
await browser.execute(() => { |
|
document |
|
.querySelector(".overflow-y-auto.overflow-hidden.flex-grow-1") |
|
?.scrollBy(0, 2000); |
|
}); |
|
|
|
await browser.pause(1000); |
|
|
|
const newConversationItems = await $$(SELECTORS.conversationItem); |
|
|
|
if ((newConversationItems.length as unknown as number) > initialCount) { |
|
return scrollToLoadMoreConversations( |
|
newConversationItems.length as unknown as number |
|
); |
|
} |
|
|
|
return newConversationItems; |
|
} |
|
|
|
/** |
|
* Wait for all messages to be loaded, we do this by checking if the person's name is present in the messages. |
|
* If not, we scroll to the top and check again. |
|
*/ |
|
async function waitForAllMessagesLoaded(personName: string): Promise<void> { |
|
let personFound = false; |
|
|
|
while (!personFound) { |
|
personFound = await checkForPersonNameInMessagesHeader(personName); |
|
|
|
if (!personFound) { |
|
const olderMessagesExists = await $( |
|
SELECTORS.messageOlderMessages |
|
).isExisting(); |
|
|
|
if (olderMessagesExists) { |
|
await $(SELECTORS.messageContainer).scrollIntoView(); |
|
try { |
|
await $(SELECTORS.messageOlderMessages).waitForExist({ |
|
timeout: 1000, |
|
}); |
|
} catch (_ign) {} |
|
} else { |
|
console.error( |
|
`Error: Person name "${personName}" not found in the conversation.` |
|
); |
|
break; |
|
} |
|
} |
|
} |
|
} |
|
|
|
/** |
|
* Wait for the person's name to be present in the messages header |
|
*/ |
|
async function checkForPersonNameInMessagesHeader( |
|
personName: string |
|
): Promise<boolean> { |
|
try { |
|
await browser.waitUntil( |
|
async () => |
|
(await $(SELECTORS.messagePerson).getHTML()).includes(personName), |
|
{ |
|
timeout: 3000, |
|
timeoutMsg: `Person name "${personName}" not found within the given time.`, |
|
} |
|
); |
|
return true; |
|
} catch (error) { |
|
return false; |
|
} |
|
} |
|
|
|
/** |
|
* Some CSV formatting magic |
|
*/ |
|
function formatCsvRow(personName: string, messages: string): string { |
|
const escapedMessages = messages.replace(/"/g, '""'); |
|
return `"${personName}","${escapedMessages}"\n`; |
|
} |
|
|
|
/** |
|
* Write the CSV row to the file |
|
*/ |
|
function writeCsvRow(csvRow: string, filePath: string): void { |
|
fs.appendFileSync(filePath, csvRow, "utf8"); |
|
} |