forked from fivefilters/ftr-site-config
-
Notifications
You must be signed in to change notification settings - Fork 0
/
.medium.com.txt
36 lines (29 loc) · 1.46 KB
/
.medium.com.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# Copy changes between medium.com.txt and .medium.com.txt
http_header(user-agent): Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)
title: //meta[@property="og:title"]/@content
title: //h1
body: //article
author: //meta[@name="author"]/@content
date: //meta[@property="article:published_time"]/@content
strip: //button
strip: //header[contains(@class, 'pw-post-byline-header')]
# Remove low quality images
strip: //article//img[contains(@src, '?q=20') or contains(@src, 'max/34/')]
# Remove empty images (width attribute but no src attribute)
strip: //article//img[not(@src)]
# Use the high-quality copies in <noscript> elements
replace_string(<noscript>): <div class="ftr-noscript">
replace_string(</noscript>): </div>
# Only the first - h1 can appear later in an article
# e.g. https://medium.com/the-shape-of-things-to-come/facilitating-communities-of-practice-from-individual-to-ecosystem-learning-outcomes-bf7b54660b08
strip: (//h1)[1]
strip: //svg
# remove duplicate images in this form... <img><noscript><img></noscript>
# handled above, but if browser HTML is submitted, the above rules won't apply
strip: //article//img[@src]/following-sibling::*[1][self::div and @class='ftr-noscript']
# less accurate...
# strip: //article//img[@src]/following-sibling::div[@class="ftr-noscript"]
tidy: no
prune: no
test_url: https://elemental.medium.com/the-dark-side-of-fitness-tracking-9b218989bc47
test_contains: Apps have turned movement and mindfulness