Skip to content

Commit

Permalink
Various improvements to favicons (#504)
Browse files Browse the repository at this point in the history
* Update default favicon provider

* Add domain placeholder for favicon providers

* Fix favicon loader to handle streaming response

* Handle different mime types for favicons

* Use 32px size by default

* Update documentation

* Skip mime-type test for now

* Manually configure image/x-icon mime type
  • Loading branch information
sissbruecker committed Aug 15, 2023
1 parent ea240ee commit 5d9e487
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 38 deletions.
70 changes: 48 additions & 22 deletions bookmarks/services/favicon_loader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import mimetypes
import os.path
import re
import shutil
import time
from pathlib import Path
from urllib.parse import urlparse
Expand All @@ -10,48 +11,73 @@

max_file_age = 60 * 60 * 24 # 1 day

logger = logging.getLogger(__name__)

# register mime type for .ico files, which is not included in the default
# mimetypes of the Docker image
mimetypes.add_type('image/x-icon', '.ico')


def _ensure_favicon_folder():
Path(settings.LD_FAVICON_FOLDER).mkdir(parents=True, exist_ok=True)


def _url_to_filename(url: str) -> str:
name = re.sub(r'\W+', '_', url)
return f'{name}.png'
return re.sub(r'\W+', '_', url)


def _get_base_url(url: str) -> str:
def _get_url_parameters(url: str) -> dict:
parsed_uri = urlparse(url)
return f'{parsed_uri.scheme}://{parsed_uri.hostname}'
return {
# https://example.com/foo?bar -> https://example.com
'url': f'{parsed_uri.scheme}://{parsed_uri.hostname}',
# https://example.com/foo?bar -> example.com
'domain': parsed_uri.hostname,
}


def _get_favicon_path(favicon_file: str) -> Path:
return Path(os.path.join(settings.LD_FAVICON_FOLDER, favicon_file))


def _check_existing_favicon(favicon_name: str):
# return existing file if a file with the same name, ignoring extension,
# exists and is not stale
for filename in os.listdir(settings.LD_FAVICON_FOLDER):
file_base_name, _ = os.path.splitext(filename)
if file_base_name == favicon_name:
favicon_path = _get_favicon_path(filename)
return filename if not _is_stale(favicon_path) else None
return None


def _is_stale(path: Path) -> bool:
stat = path.stat()
file_age = time.time() - stat.st_mtime
return file_age >= max_file_age


def load_favicon(url: str) -> str:
# Get base URL so that we can reuse favicons for multiple bookmarks with the same host
base_url = _get_base_url(url)
favicon_name = _url_to_filename(base_url)
favicon_path = _get_favicon_path(favicon_name)

# Load icon if it doesn't exist yet or has become stale
if not favicon_path.exists() or _is_stale(favicon_path):
# Create favicon folder if not exists
_ensure_favicon_folder()
# Load favicon from provider, save to file
favicon_url = settings.LD_FAVICON_PROVIDER.format(url=base_url)
response = requests.get(favicon_url, stream=True)

with open(favicon_path, 'wb') as file:
shutil.copyfileobj(response.raw, file)
url_parameters = _get_url_parameters(url)

del response
# Create favicon folder if not exists
_ensure_favicon_folder()
# Use scheme+hostname as favicon filename to reuse icon for all pages on the same domain
favicon_name = _url_to_filename(url_parameters['url'])
favicon_file = _check_existing_favicon(favicon_name)

return favicon_name
if not favicon_file:
# Load favicon from provider, save to file
favicon_url = settings.LD_FAVICON_PROVIDER.format(**url_parameters)
logger.debug(f'Loading favicon from: {favicon_url}')
with requests.get(favicon_url, stream=True) as response:
content_type = response.headers['Content-Type']
file_extension = mimetypes.guess_extension(content_type)
favicon_file = f'{favicon_name}{file_extension}'
favicon_path = _get_favicon_path(favicon_file)
with open(favicon_path, 'wb') as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
logger.debug(f'Saved favicon as: {favicon_path}')

return favicon_file
8 changes: 4 additions & 4 deletions bookmarks/services/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,12 @@ def _load_favicon_task(bookmark_id: int):

logger.info(f'Load favicon for bookmark. url={bookmark.url}')

new_favicon = favicon_loader.load_favicon(bookmark.url)
new_favicon_file = favicon_loader.load_favicon(bookmark.url)

if new_favicon != bookmark.favicon_file:
bookmark.favicon_file = new_favicon
if new_favicon_file != bookmark.favicon_file:
bookmark.favicon_file = new_favicon_file
bookmark.save(update_fields=['favicon_file'])
logger.info(f'Successfully updated favicon for bookmark. url={bookmark.url} icon={new_favicon}')
logger.info(f'Successfully updated favicon for bookmark. url={bookmark.url} icon={new_favicon_file}')


def schedule_bookmarks_without_favicons(user: User):
Expand Down
4 changes: 2 additions & 2 deletions bookmarks/templates/settings/general.html
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ <h2>Profile</h2>
Automatically loads favicons for bookmarked websites and displays them next to each bookmark.
By default, this feature uses a <b>Google service</b> to download favicons.
If you don't want to use this service, check the <a
href="https://github.com/sissbruecker/linkding/blob/master/docs/Options.md" target="_blank">options
documentation</a> on how to configure a custom favicon provider.
href="https://github.com/sissbruecker/linkding/blob/master/docs/Options.md#ld_favicon_provider"
target="_blank">options documentation</a> on how to configure a custom favicon provider.
Icons are downloaded in the background, and it may take a while for them to show up.
</div>
{% if request.user_profile.enable_favicons and enable_refresh_favicons %}
Expand Down
61 changes: 55 additions & 6 deletions bookmarks/tests/test_favicon_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,40 @@
import os.path
import time
from pathlib import Path
from unittest import mock
from unittest import mock, skip

from django.conf import settings
from django.test import TestCase
from django.test import TestCase, override_settings

from bookmarks.services import favicon_loader

mock_icon_data = b'mock_icon'


class MockStreamingResponse:
def __init__(self, data=mock_icon_data, content_type='image/png'):
self.chunks = [data]
self.headers = {'Content-Type': content_type}

def iter_content(self, **kwargs):
return self.chunks

def __enter__(self):
return self

def __exit__(self, exc_type, exc_value, traceback):
pass


class FaviconLoaderTestCase(TestCase):
def setUp(self) -> None:
self.ensure_favicon_folder()
self.clear_favicon_folder()

def create_mock_response(self, icon_data=mock_icon_data):
def create_mock_response(self, icon_data=mock_icon_data, content_type='image/png'):
mock_response = mock.Mock()
mock_response.raw = io.BytesIO(icon_data)
return mock_response
return MockStreamingResponse(icon_data, content_type)

def ensure_favicon_folder(self):
Path(settings.LD_FAVICON_FOLDER).mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -93,12 +108,14 @@ def test_load_favicon_caches_icons(self):
with mock.patch('requests.get') as mock_get:
mock_get.return_value = self.create_mock_response()

favicon_loader.load_favicon('https://example.com')
favicon_file = favicon_loader.load_favicon('https://example.com')
mock_get.assert_called()
self.assertEqual(favicon_file, 'https_example_com.png')

mock_get.reset_mock()
favicon_loader.load_favicon('https://example.com')
updated_favicon_file = favicon_loader.load_favicon('https://example.com')
mock_get.assert_not_called()
self.assertEqual(favicon_file, updated_favicon_file)

def test_load_favicon_updates_stale_icon(self):
with mock.patch('requests.get') as mock_get:
Expand All @@ -125,3 +142,35 @@ def test_load_favicon_updates_stale_icon(self):
favicon_loader.load_favicon('https://example.com')
mock_get.assert_called()
self.assertEqual(updated_mock_icon_data, self.get_icon_data('https_example_com.png'))

@override_settings(LD_FAVICON_PROVIDER='https://custom.icons.com/?url={url}')
def test_custom_provider_with_url_param(self):
with mock.patch('requests.get') as mock_get:
mock_get.return_value = self.create_mock_response()

favicon_loader.load_favicon('https://example.com/foo?bar=baz')
mock_get.assert_called_with('https://custom.icons.com/?url=https://example.com', stream=True)

@override_settings(LD_FAVICON_PROVIDER='https://custom.icons.com/?url={domain}')
def test_custom_provider_with_domain_param(self):
with mock.patch('requests.get') as mock_get:
mock_get.return_value = self.create_mock_response()

favicon_loader.load_favicon('https://example.com/foo?bar=baz')
mock_get.assert_called_with('https://custom.icons.com/?url=example.com', stream=True)

def test_guess_file_extension(self):
with mock.patch('requests.get') as mock_get:
mock_get.return_value = self.create_mock_response(content_type='image/png')
favicon_loader.load_favicon('https://example.com')

self.assertTrue(self.icon_exists('https_example_com.png'))

self.clear_favicon_folder()
self.ensure_favicon_folder()

with mock.patch('requests.get') as mock_get:
mock_get.return_value = self.create_mock_response(content_type='image/x-icon')
favicon_loader.load_favicon('https://example.com')

self.assertTrue(self.icon_exists('https_example_com.ico'))
14 changes: 11 additions & 3 deletions docs/Options.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,20 @@ A json string with additional options for the database. Passed directly to OPTIO

### `LD_FAVICON_PROVIDER`

Values: `String` | Default = `https://t1.gstatic.com/faviconV2?url={url}&client=SOCIAL&type=FAVICON`
Values: `String` | Default = `https://t1.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url={url}&size=32`

The favicon provider used for downloading icons if they are enabled in the user profile settings.
The default provider is a Google service that automatically detects the correct favicon for a website, and provides icons in consistent image format (PNG) and in a consistent image size.

This setting allows to configure a custom provider in form of a URL.
When calling the provider with the URL of a website, it must return the image data for the favicon of that website.
The configured favicon provider URL must contain a `{url}` placeholder that will be replaced with the URL of the website for which to download the favicon.
See the default URL for an example.
The configured favicon provider URL must contain a placeholder that will be replaced with the URL of the website for which to download the favicon.
The available placeholders are:
- `{url}` - Includes the scheme and hostname of the website, for example `https://example.com`
- `{domain}` - Includes only the hostname of the website, for example `example.com`

Which placeholder you need to use depends on the respective favicon provider, please check their documentation or usage examples.
See the default URL for how to insert the placeholder to the favicon provider URL.

Alternative favicon providers:
- DuckDuckGo: `https://icons.duckduckgo.com/ip3/{domain}.ico`
2 changes: 1 addition & 1 deletion siteroot/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@
}

# Favicons
LD_DEFAULT_FAVICON_PROVIDER = 'https://t1.gstatic.com/faviconV2?url={url}&client=SOCIAL&type=FAVICON'
LD_DEFAULT_FAVICON_PROVIDER = 'https://t1.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url={url}&size=32'
LD_FAVICON_PROVIDER = os.getenv('LD_FAVICON_PROVIDER', LD_DEFAULT_FAVICON_PROVIDER)
LD_FAVICON_FOLDER = os.path.join(BASE_DIR, 'data', 'favicons')
LD_ENABLE_REFRESH_FAVICONS = os.getenv('LD_ENABLE_REFRESH_FAVICONS', True) in (True, 'True', '1')

0 comments on commit 5d9e487

Please sign in to comment.