Jelajahi Sumber

Use link-preview package

sbkwgh 7 tahun lalu
induk
melakukan
62c7e8a7d8

+ 0 - 42
lib/linkPreview/getOGPreviewData.js

@@ -1,42 +0,0 @@
-let cheerio = require('cheerio');
-let axios = require('axios');
-
-module.exports = async function getOGPreviewData (url) {
-	try {
-		let res = await axios.get(url);
-		let $ = cheerio.load(res.data);
-
-		let OG = {
-			title: $('meta[property="og:title"]'),
-			url: $('meta[property="og:url"]'),
-			image: $('meta[property="og:image"]'),
-			description: $('meta[property="og:description"]')
-		};
-
-		let alternative = {
-			title: $('title'),
-			description: $('meta[name="description"]')
-		};
-
-		let data = {};
-
-		if(OG.title.length && OG.url.length) {
-			data.title = OG.title.attr('content'); 
-			data.url = OG.url.attr('content');
-
-			if(OG.image) data.image = OG.image.attr('content');
-			if(OG.description) data.description = OG.description.attr('content');
-
-			return data;
-		} else if(alternative.title.length && alternative.description.length) {
-			data.title = alternative.title.text();
-			data.description = alternative.description.attr('content');
-		} else {
-			return null;
-		}
-
-		return data;
-	} catch (e) {
-		return null;
-	}
-}

+ 0 - 35
lib/linkPreview/getPreviewHTML.js

@@ -1,35 +0,0 @@
-let url = require('url');
-let ejs = require('ejs');
-
-module.exports = function getPreviewHTML (data) {
-	let template = `
-		<div class='link_preview'>
-			<h1>
-				<a href='<%= url %>' target='_blank' rel='noopener noreferrer'>
-					<%= title %>
-				</a>
-			</h1>
-			<h2>
-				from <%= hostname %>
-			</h2>
-			<% if(locals.partial) { %>
-				<div class='link_preview__partial'>
-					<%- partial %>
-				</div>
-			<% } %>
-			<% if(locals.image || locals.description) { %>
-				<p>
-					<% if(locals.image) { %>
-						<img src='<%= image %>'>
-					<% } %>
-					<% if(locals.description) { %>
-						<%= description %>
-					<% } %>
-				</p>
-			<% } %>
-		</div>
-	`;
-
-	data.hostname = url.parse(data.url).hostname;
-	return ejs.render(template, data);
-}

+ 0 - 37
lib/linkPreview/index.js

@@ -1,37 +0,0 @@
-let fs = require('fs');
-let path = require('path')
-
-let getOGPreviewData = require('./getOGPreviewData');
-let getPreviewHTML = require('./getPreviewHTML');
-
-let previewPatterns = 
-	fs.readdirSync(path.join(__dirname, 'patterns'))
-	  .map(file => {
-	      return require(path.join(__dirname, 'patterns', file));
-	  });
-
-module.exports =  async function linkPreview(url) {
-	let previewData;
-
-	for(let pattern of previewPatterns) {
-		if(pattern.matches(url)) {
-			previewData = await pattern.getPreviewData(url);
-			break;
-		}
-	}
-
-	//If the url doesn't match a pattern for a specific
-	//site, try getting a possible preview using OG tags
-	if(!previewData) previewData = await getOGPreviewData(url);
-
-	//If there is some data scraped from the site for a
-	//preview, generate a HTML string
-	//Otherwise return an empty string
-	if(typeof previewData === 'object' && previewData !== null) {
-		return getPreviewHTML(previewData);
-	} else if(typeof previewData === 'string') {
-		return previewData;
-	} else {
-		return '';
-	}
-}

+ 0 - 49
lib/linkPreview/patterns/amazon.js

@@ -1,49 +0,0 @@
-let ejs = require('ejs');
-let cheerio = require('cheerio');
-let axios = require('axios');
-
-module.exports = {
-	matches (url) {
-		let amazonRegExp = /^(https?:\/\/(www\.)?(smile.)?(amazon|amzn)\.(com|com\.au|com\.br|ca|cn|fr|de|in|it|co\.jp|com\.mx|nl|es|co\.uk)\/(gp\/product|[A-Z0-9-]+\/dp)\/[A-Z0-9]+)/i;
-		return url.match(amazonRegExp);
-	},
-	async getPreviewData (link_url) {
-		try {
-			let res = await axios.get(link_url);
-			let $ = cheerio.load(res.data);
-
-			data = {
-				title: $('#productTitle').text().trim(),
-				description: $('meta[name="description"]').attr('content').trim(),
-				url: this.matches(link_url)[0]
-			}
-			
-			let image = $('#landingImage').data('old-hires');
-			if(image) data.image = image;
-
-			let price = $('#priceblock_ourprice').text();
-			let stars = $('.a-icon.a-icon-star .a-icon-alt').first().text();
-			if(stars ||price) {
-				let reviewUrl = data.url + '#customerReviews';
-				let partialTemplate = `
-					<% if (stars) { %>
-						<a href='<%= reviewUrl %>' target='_blank' rel='noopener noreferer'>
-							<%= stars%>
-						</a>
-						&nbsp; | &nbsp;
-					<% } %>
-					<% if (price) { %>
-						<%= price %>
-					<% } %>
-				`;
-
-				data.partial = ejs.render(partialTemplate, { reviewUrl, stars, price })
-			}
-
-			return data;
-		} catch (e) {
-			console.log(e)
-			return null;
-		}
-	}
-};

+ 0 - 35
lib/linkPreview/patterns/github.js

@@ -1,35 +0,0 @@
-let url = require('url');
-let ejs = require('ejs');
-let axios = require('axios');
-
-module.exports = {
-	matches (url) {
-		return url.match(/^https?:\/\/(www\.)?github\.com\/.+\/.+/i);
-	},
-	async getPreviewData (link_url) {
-		try {
-			let pathname = url.parse(link_url).pathname;
-			let res = await axios.get('https://api.github.com/repos' + pathname);
-
-			let partialTemplate = `
-				<a href='<%= html_url + '/stargazers' %>' target='_blank' rel='noopener noreferer'>
-					<%= stargazers_count %> stargazer<%= stargazers_count === 1 ? '' : 's' %>
-				</a>
-				and
-				<a href='<%= html_url + '/network' %>' target='_blank' rel='noopener noreferer'>
-					<%= forks_count %> fork<%= forks_count === 1 ? '' : 's' %>
-				</a>
-			`;
-
-			return {
-				title: res.data.full_name,
-				url: res.data.html_url,
-				description: res.data.description,
-				partial: ejs.render(partialTemplate, res.data),
-				image: res.data.owner.avatar_url
-			};
-		} catch (e) {
-			return null;
-		}
-	}
-};

+ 0 - 15
lib/linkPreview/patterns/twitter.js

@@ -1,15 +0,0 @@
-let axios = require('axios');
-
-module.exports = {
-	matches (url) {
-		return url.match(/^https?:\/\/(www\.)?twitter\.com\/.+\/status\/\d+/i);
-	},
-	async getPreviewData (url) {
-		try {
-			let res = await axios.get('https://publish.twitter.com/oembed?url=' + url);
-			return res.data.html;
-		} catch (e) {
-			return null;
-		}
-	}
-};

+ 0 - 26
lib/linkPreview/patterns/wikipedia.js

@@ -1,26 +0,0 @@
-let url = require('url');
-let axios = require('axios');
-
-module.exports = {
-	matches (url) {
-		return url.match(/^https?:\/\/[a-z]+\.wikipedia\.org\/wiki\/.+/i);
-	},
-	async getPreviewData (link_url) {
-		try {
-			let parsedUrl = url.parse(link_url);
-			let page = parsedUrl.pathname.split('/').slice(-1)[0];
-			let countryVersion = parsedUrl.hostname.split('.')[0];
-						
-			let res = await axios.get(`https://${countryVersion}.wikipedia.org/api/rest_v1/page/summary/${page}?redirect=true`);
-			let content = res.data.extract.slice(0, 500).trim();
-
-			return {
-				title: res.data.titles.display,
-				url: res.data.content_urls.desktop.page,
-				description: content.length < 500 ? content : content + '...'
-			}
-		} catch (e) {
-			return null;
-		}
-	}
-};

+ 10 - 0
package-lock.json

@@ -3102,6 +3102,16 @@
       "resolved": "https://registry.npmjs.org/pretty-hrtime/-/pretty-hrtime-1.0.3.tgz",
       "integrity": "sha1-t+PqQkNaTJsnWdmeDyAesZWALuE="
     },
+    "preview-link": {
+      "version": "0.0.0",
+      "resolved": "https://registry.npmjs.org/preview-link/-/preview-link-0.0.0.tgz",
+      "integrity": "sha512-vYhlxNkcnfdmeoqdcOtjYY8RbbPb1VdiHXwjeOmBw8uTJ2BhNnt35nPDnLoS0oc7/UTdf0m3ueKunmwF7fpvWg==",
+      "requires": {
+        "axios": "0.18.0",
+        "cheerio": "1.0.0-rc.2",
+        "ejs": "2.5.7"
+      }
+    },
     "process-nextick-args": {
       "version": "1.0.7",
       "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-1.0.7.tgz",

+ 1 - 0
package.json

@@ -31,6 +31,7 @@
     "multer": "^1.3.0",
     "mysql": "^2.13.0",
     "mysql2": "^1.4.2",
+    "preview-link": "0.0.0",
     "randomcolor": "^0.4.4",
     "sequelize": "^3.30.0",
     "sequelize-cli": "^2.8.0",

+ 1 - 3
routes/link_preview.js

@@ -1,9 +1,7 @@
-let linkPreview = require('../lib/linkPreview');
+let linkPreview = require('preview-link');
 let express = require('express');
 let router = express.Router();
 
-const Errors = require('../lib/errors.js');
-
 router.get('/', async (req, res, next) => {
 	try {
 		let url = req.query.url;

+ 0 - 191
test/link_preview.js

@@ -1,191 +0,0 @@
-process.env.NODE_ENV = 'test';
-
-let chai = require('chai');
-let server = require('../server');
-let should = chai.should();
-let expect = chai.expect;
-
-let getOGPreviewData = require('../lib/linkPreview/getOGPreviewData');
-let getPreviewHTML = require('../lib/linkPreview/getPreviewHTML');
-let linkPreview = require('../lib/linkPreview');
-
-let github = require('../lib/linkPreview/patterns/github');
-let wikipedia = require('../lib/linkPreview/patterns/wikipedia');
-let twitter = require('../lib/linkPreview/patterns/twitter');
-let amazon = require('../lib/linkPreview/patterns/amazon');
-
-const Errors = require('../lib/errors.js');
-
-chai.use(require('chai-http'));
-chai.use(require('chai-things'));
-
-
-describe('link_expansion', () => {
-	//Wait for app to start before commencing
-	before((done) => {
-		if(server.locals.appStarted) done();
-		server.on('appStarted', done);
-	});
-
-	describe('getOGPreviewData', () => {
-		it('should return an object containing relevant OG data', async () => {
-			let data = await getOGPreviewData('https://www.theguardian.com/news/2018/mar/17/cambridge-analytica-facebook-influence-us-election')
-
-			data.should.have.property(
-				'title',
-				'Revealed: 50 million Facebook profiles harvested for Cambridge Analytica in major data breach'
-			);
-			data.should.have.property(
-				'description',
-				'Whistleblower describes how firm linked to former Trump adviser Steve Bannon compiled user data to target American voters• How Cambridge Analytica’s algorithms turned ‘likes’ into a political tool'
-			);
-			data.should.have.property(
-				'url',
-				'http://www.theguardian.com/news/2018/mar/17/cambridge-analytica-facebook-influence-us-election'
-			);
-			data.should.have.property(
-				'image',
-				'https://i.guim.co.uk/img/media/97532076a6935a1e79eba294437ed91f3eb4df6b/0_626_4480_2688/master/4480.jpg?w=1200&h=630&q=55&auto=format&usm=12&fit=crop&crop=faces%2Centropy&bm=normal&ba=bottom%2Cleft&blend64=aHR0cHM6Ly91cGxvYWRzLmd1aW0uY28udWsvMjAxOC8wMS8zMS9mYWNlYm9va19kZWZhdWx0LnBuZw&s=365825fe053733ae12f9b050f5374594'
-			);
-		});
-		it('should use other meta or title tags if there is no OG tags availible', async () => {
-			let data = await getOGPreviewData('http://ejs.co');
-			data.should.have.property('title', 'EJS -- Embedded JavaScript templates');
-			data.should.have.property(
-				'description',
-				"'E' is for 'effective'. EJS is a simple templating language that lets you generate HTML markup with plain JavaScript. No religiousness about how to organize things. No reinvention of iteration and control-flow. It's just plain JavaScript."
-			);
-		});
-		it('should return null if there is no OG tags availible', async () => {
-			let data = await getOGPreviewData('http://blank.org');
-			expect(data).to.be.null;
-		});
-	});
-
-	describe('getPreviewHTML', () => {
-		it('should return an HTML string for given object', () => {
-			let HTML = getPreviewHTML({
-				url: 'http://www.example.com',
-				description: 'description',
-				title: 'title',
-				image: 'image'
-			});
-
-			(typeof HTML).should.equal('string');
-		})
-		it('should correctly deal with the conditional', () => {
-			let HTML = getPreviewHTML({
-				url: 'http://www.example.com',
-				description: 'description',
-				title: 'title'
-			});
-			(typeof HTML).should.equal('string');
-		})
-	});
-
-	describe('linkPreview', () => {
-		it('should get a HTML string from an OG link', async () => {
-			let HTML = await linkPreview('https://www.theguardian.com/news/2018/mar/17/cambridge-analytica-facebook-influence-us-election');
-
-			(typeof HTML).should.equal('string');
-			HTML.length.should.be.above(0);
-		});
-
-		it('should get a HTML string from a custom pattern', async () => {
-			let HTML = await linkPreview('https://en.wikipedia.org/wiki/google');
-
-			(typeof HTML).should.equal('string');
-			HTML.length.should.be.above(0);
-		});
-
-		it('should return an empty string from an invalid site', async () => {
-			let HTML = await linkPreview('http://blank.org');
-
-			(typeof HTML).should.equal('string');
-		});
-	});
-
-	describe('GitHub', () => {
-		it('should match a valid GitHub url', () => {
-			github.matches('https://github.com/sbkwgh/forum').should.not.be.null;
-			github.matches('http://github.com/sbkwgh/forum').should.not.be.null;
-			
-			expect(github.matches('http://notgithub.com/sbkwgh/forum')).to.be.null;
-		});
-		it('should return a data object', async () => {
-			let data = await github.getPreviewData('https://github.com/sbkwgh/forum');
-
-			data.should.have.property('title', 'sbkwgh/forum')
-			data.should.have.property('url', 'https://github.com/sbkwgh/forum')
-			data.should.have.property('description', 'Forum software created using Express, Vue, and Sequelize')
-		});
-	});
-
-	describe('Wikipedia', () => {
-		it('should match a valid Wikipedia url', () => {
-			wikipedia.matches('https://en.wikipedia.org/wiki/google').should.not.be.null;
-			wikipedia.matches('http://fr.wikipedia.org/wiki/google').should.not.be.null;
-			
-			expect(wikipedia.matches('http://en.wikipedia.org/notapage')).to.be.null;
-		});
-		it('should return a data object', async () => {
-			let data = await wikipedia.getPreviewData('https://en.wikipedia.org/wiki/google');
-
-			data.should.have.property('title', 'Google')
-			data.should.have.property('url', 'https://en.wikipedia.org/wiki/Google')
-			data.description.should.have.length(503)
-		});
-	});
-
-	describe('Twitter', () => {
-		it('should match a valid Wikipedia url', () => {
-			twitter.matches('https://twitter.com/user/status/12345').should.not.be.null;
-			
-			expect(twitter.matches('http://twitter.com/notapage/123456')).to.be.null;
-			expect(twitter.matches('http://twitter.com/notapage/status/qwertyu')).to.be.null;
-		});
-		it('should return a data object', async () => {
-			let HTML = await twitter.getPreviewData('https://twitter.com/Interior/status/463440424141459456');
-
-			(typeof HTML).should.equal('string');
-			HTML.should.have.length.above(0);
-		});
-	});
-
-	describe('Amazon', () => {
-		it('should match a valid Amazon url', () => {
-			amazon.matches('https://www.amazon.co.uk/gp/product/0199858616').should.not.be.null;
-			amazon.matches('https://smile.amazon.co.uk/gp/product/0199858616').should.not.be.null;
-			amazon.matches('https://www.amazon.co.uk/Betron-Isolating-Earphones-Headphones-Microphone-Black/dp/B01N1X4910').should.not.be.null;
-			amazon.matches('http://amazon.co.uk/Sony-5-5-Inch-Android-SIM-Free-Smartphone-Gold/dp/B0792GT5T4/ref=dfg').should.not.be.null;
-			
-			expect(amazon.matches('https://www.amazon.co.uk/gp/dmusic/promotions/AmazonMusicUnlimited')).to.be.null;
-			expect(amazon.matches('https://www.amazon.co.uk/')).to.be.null;
-		});
-
-		it('should return a correct data object', async () => {
-			let data = await amazon.getPreviewData(
-				`https://www.amazon.co.uk/gp/product/B005G39HUK/ref=s9u_ri_gw_i2?ie=UTF8&fpl=fresh&pd_rd_i=B005G39HUK&pd_rd_r=4edec2c7-2abc-11e8-9a21-019e4b2648c4&pd_rd_w=jtpWg&pd_rd_wg=lyBTu&pf_rd_m=A3P5ROKL5A1OLE&pf_rd_s=&pf_rd_r=8G2NPHM6AE411J2M0V6Z&pf_rd_t=36701&pf_rd_p=81d63d24-31ce-4958-9c19-bb66b139bc25&pf_rd_i=desktop`
-			);
-
-			data.should.have.property(
-				'description',
-				"Fruit of the Loom Men's Super Premium Short Sleeve T-Shirt: Free UK Shipping on Orders Over £10 and Free 30-Day Returns on Selected Fashion Items sold or fulfilled by Amazon."
-			);
-			data.should.have.property(
-				'url',
-				'https://www.amazon.co.uk/gp/product/B005G39HUK'
-			);
-			data.should.have.property(
-				'title',
-				"Fruit of the Loom Men's Super Premium Short Sleeve T-Shirt"
-			);
-			data.should.have.property(
-				'image',
-				'https://images-na.ssl-images-amazon.com/images/I/91q6n9sLPsL._UL1500_.jpg'
-			);
-			data.partial.includes('4.4 out of 5 stars').should.be.true;
-			data.partial.includes('£1.20 - £19.99').should.be.true;
-		});
-	});
-})