fix(core): Extract all types of links (#77)

Previously, only links formatted with markdown were extraced. Now all links (except links to images) are being fetched. (#74) partial fix
jakipatryk · Sep 1, 2018 · 7e9c823 · 7e9c823
1 parent d2454d0
commit 7e9c823
Show file tree

Hide file tree

Showing 2 changed files with 25 additions and 16 deletions.
diff --git a/src/core/steemize/withJsonMetadata.spec.ts b/src/core/steemize/withJsonMetadata.spec.ts
@@ -216,8 +216,9 @@ fdescribe('#core #steemize (json_metadata) withImage', () => {
 });
 
 fdescribe('#core #steemize (json_metadata) getLinks', () => {
-  it('should extract and return an array of links if there are any correctly formatted with Markdown', () => {
+  it('should extract and return an array of unique links if there are any', () => {
     const body = `[Markdown is the best!](https://steeditor.app/)
+    https://utopian.io
     <p>test</p> (https://steeditor.app/test/)
     [NICE LINK](https://steeditor.app/drafts/) this weird mixin is still considered a Markdown tho :(
     [NICE LINK](https://steeditor.app/drafts/)`;
@@ -226,11 +227,13 @@ fdescribe('#core #steemize (json_metadata) getLinks', () => {
 
     expect(links).toEqual([
       'https://steeditor.app/',
+      'https://utopian.io',
+      'https://steeditor.app/test/',
       'https://steeditor.app/drafts/'
     ]);
   });
 
-  it('should ignore markdown images (`![]()` syntax)', () => {
+  it('should ignore images', () => {
     const body = `![Markdown is the best!](https://steeditor.app/images/amazing_image)
     <p>test</p>
     [NICELINK](https://steeditor.app/)
@@ -242,9 +245,8 @@ fdescribe('#core #steemize (json_metadata) getLinks', () => {
     expect(links).not.toContain('https://steeditor.app/images/amazing_image');
   });
 
-  it('should return an empty array if there are no links or they are not formatted with Markdown', () => {
-    const body = `<a href="https://steeditor.app/">HTML is bad, remember</a>
-    https://steeditor.app/link/ this weird mixin is still considered a Markdown tho :(`;
+  it('should return an empty array if there arent any links', () => {
+    const body = `HTML is bad, remember blabla :(`;
 
     const links = getLinks(body);
 

diff --git a/src/core/steemize/withJsonMetadata.ts b/src/core/steemize/withJsonMetadata.ts
@@ -2,22 +2,24 @@ import {
   always,
   assoc,
   compose,
+  contains,
   either,
-  flatten,
   identity,
   ifElse,
+  init,
   isEmpty,
   isNil,
+  last,
   map,
   match,
   mergeDeepLeft,
   o,
   pipe,
   prepend,
   reject,
-  startsWith,
   test,
-  uniq
+  uniq,
+  __
 } from 'ramda';
 import * as appInfo from '../../../package.json';
 import { SteeditorPost } from '../SteeditorPost';
@@ -118,17 +120,22 @@ export const withImage = (body: string, thumbnail?: string) => (target: {
 
 /**
  * Extracts links from given string.
- * Note that it will only return links which are correctly formatted with Markdown.
  * @param text A text to extract links from.
  * @returns An array of unique links.
  */
-export const getLinks: (text: string) => Array<string> = pipe(
-  match(/.?(?:\[(.*?)\]\((.*?)\))/g),
-  reject(startsWith('!')),
-  map(getMatchesByGroup(/\((.*?)\)/g, 1)),
-  flatten,
-  uniq
-);
+export const getLinks = (text: string): Array<string> => {
+  const linksToImages = getImages(text);
+  return pipe(
+    match(
+      // tslint:disable-next-line:max-line-length
+      /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9]\.[^\s]{2,})/g
+    ),
+    // if it's a markdown link, the last letter will be ')', it has to be removed
+    map(link => (last(link) === ')' ? init(link) : link)),
+    reject(contains(__, linksToImages)),
+    uniq
+  )(text);
+};
 
 /**
  * Adds `links` property to provided `target` object, based on provided `body` string.