Skip to content

Commit

Permalink
facebook: try to pull out post types from HTML content
Browse files Browse the repository at this point in the history
  • Loading branch information
redshiftzero committed Feb 21, 2025
1 parent ab3cf81 commit 0b56d2f
Showing 1 changed file with 30 additions and 10 deletions.
40 changes: 30 additions & 10 deletions src/account_facebook/facebook_account_controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -512,14 +512,11 @@ export class FacebookAccountController {
const postElements = dom.window.document.querySelectorAll('._a6-g');

for (const postElement of postElements) {
// Status updates have exactly one text div inside _2pin
const contentDiv = postElement.querySelector('._2pin');
const directTextDiv = contentDiv?.querySelector(':scope > div');
const hasComplexStructure = directTextDiv?.querySelector('div');

if (hasComplexStructure) {
log.info("FacebookAccountController.importFacebookArchive: skipping post with complex nested structure");
continue; // Skip posts with complex nested structure (shared posts, groups, etc)
const postType = getPostType(postElement);

if (postType === 'shared_group') {
log.info("FacebookAccountController.importFacebookArchive: skipping group posts");
continue;
}

const titleElement = postElement.querySelector('._a6-h');
Expand All @@ -536,7 +533,7 @@ export class FacebookAccountController {
title: titleElement.textContent || '',
full_text: contentElement.textContent || '',
created_at: dateElement.textContent || '',
isReposted: false,
isReposted: postType === 'shared_post',
});
}
};
Expand Down Expand Up @@ -608,4 +605,27 @@ export class FacebookAccountController {
skipCount: skipCount,
};
}
}
}

const getPostType = (element: Element): 'status' | 'shared_post' | 'shared_group' | 'other' => {
const pinDivs = element.querySelectorAll('._2pin');

if (pinDivs.length === 1) {
return 'status';
}

if (pinDivs.length === 2) {
// Check for group name structure
const firstPinContent = pinDivs[0].textContent?.trim();
if (firstPinContent && !firstPinContent.includes('div')) {
return 'shared_group';
}
// Shared posts have empty nested divs
const emptyDivs = pinDivs[0].querySelectorAll('div div div div');
if (emptyDivs.length > 0) {
return 'shared_post';
}
}

return 'other';
};

0 comments on commit 0b56d2f

Please sign in to comment.