From f5d4f97c028ce58a2b5c0c71d585028d9bf5ffa6 Mon Sep 17 00:00:00 2001 From: otsmr Date: Wed, 21 Jan 2026 23:14:23 +0100 Subject: [PATCH] improving link parser --- .../layers/link_preview/parse_link.dart | 122 ++++++------------ .../layers/link_preview/parser/base.dart | 22 ++-- .../{html_parser.dart => html.parser.dart} | 3 - .../link_preview/parser/json_ld.parser.dart | 98 ++++++++++++++ .../link_preview/parser/json_ld_parser.dart | 80 ------------ .../link_preview/parser/mastodon.parser.dart | 15 +++ .../parser/{og_parser.dart => og.parser.dart} | 7 - .../{other_parser.dart => other.parser.dart} | 12 -- ...witter_parser.dart => twitter.parser.dart} | 29 ++--- ...outube_parser.dart => youtube.parser.dart} | 44 +++---- test/features/link_parser_test.dart | 90 +++++++++++-- 11 files changed, 276 insertions(+), 246 deletions(-) rename lib/src/views/camera/share_image_editor/layers/link_preview/parser/{html_parser.dart => html.parser.dart} (95%) create mode 100644 lib/src/views/camera/share_image_editor/layers/link_preview/parser/json_ld.parser.dart delete mode 100644 lib/src/views/camera/share_image_editor/layers/link_preview/parser/json_ld_parser.dart create mode 100644 lib/src/views/camera/share_image_editor/layers/link_preview/parser/mastodon.parser.dart rename lib/src/views/camera/share_image_editor/layers/link_preview/parser/{og_parser.dart => og.parser.dart} (82%) rename lib/src/views/camera/share_image_editor/layers/link_preview/parser/{other_parser.dart => other.parser.dart} (62%) rename lib/src/views/camera/share_image_editor/layers/link_preview/parser/{twitter_parser.dart => twitter.parser.dart} (52%) rename lib/src/views/camera/share_image_editor/layers/link_preview/parser/{youtube_parser.dart => youtube.parser.dart} (61%) diff --git a/lib/src/views/camera/share_image_editor/layers/link_preview/parse_link.dart b/lib/src/views/camera/share_image_editor/layers/link_preview/parse_link.dart index aa75259..0923f5c 100644 --- a/lib/src/views/camera/share_image_editor/layers/link_preview/parse_link.dart +++ b/lib/src/views/camera/share_image_editor/layers/link_preview/parse_link.dart @@ -8,13 +8,14 @@ import 'package:html/parser.dart'; import 'package:http/http.dart' as http; import 'package:twonly/src/utils/log.dart'; import 'package:twonly/src/views/camera/share_image_editor/layers/link_preview/parser/base.dart'; -import 'package:twonly/src/views/camera/share_image_editor/layers/link_preview/parser/html_parser.dart'; -import 'package:twonly/src/views/camera/share_image_editor/layers/link_preview/parser/json_ld_parser.dart'; -import 'package:twonly/src/views/camera/share_image_editor/layers/link_preview/parser/og_parser.dart'; -import 'package:twonly/src/views/camera/share_image_editor/layers/link_preview/parser/other_parser.dart'; -import 'package:twonly/src/views/camera/share_image_editor/layers/link_preview/parser/twitter_parser.dart'; +import 'package:twonly/src/views/camera/share_image_editor/layers/link_preview/parser/html.parser.dart'; +import 'package:twonly/src/views/camera/share_image_editor/layers/link_preview/parser/json_ld.parser.dart'; +import 'package:twonly/src/views/camera/share_image_editor/layers/link_preview/parser/mastodon.parser.dart'; +import 'package:twonly/src/views/camera/share_image_editor/layers/link_preview/parser/og.parser.dart'; +import 'package:twonly/src/views/camera/share_image_editor/layers/link_preview/parser/other.parser.dart'; +import 'package:twonly/src/views/camera/share_image_editor/layers/link_preview/parser/twitter.parser.dart'; import 'package:twonly/src/views/camera/share_image_editor/layers/link_preview/parser/util.dart'; -import 'package:twonly/src/views/camera/share_image_editor/layers/link_preview/parser/youtube_parser.dart'; +import 'package:twonly/src/views/camera/share_image_editor/layers/link_preview/parser/youtube.parser.dart'; import 'package:twonly/src/views/camera/share_image_editor/layers/link_preview/utils.dart'; Future getMetadata(String link) async { @@ -81,7 +82,7 @@ Future getInfo( final document = responseToDocument(response); if (document == null) return info; - final data_ = _parse(document, url: url); + final data_ = _parse(document, url); return data_; } catch (error) { @@ -103,83 +104,44 @@ Document? responseToDocument(http.Response response) { return document; } -Metadata _parse(Document? document, {String? url}) { - final output = Metadata(); +Metadata _parse(Document? document, String url) { + final output = Metadata()..url = url; - final parsers = [ - _openGraph(document), - _twitterCard(document), - _youtubeCard(document), - _jsonLdSchema(document), - _htmlMeta(document), - _otherParser(document), + final allParsers = [ + // start with vendor specific to parse the vendor type + MastodonParser(document), + YoutubeParser(document, url), + TwitterParser(document, url), + + JsonLdParser(document), + OpenGraphParser(document), + HtmlMetaParser(document), + OtherParser(document), ]; - for (final p in parsers) { - if (p == null) continue; - - output.title ??= p.title; - output.desc ??= p.desc; - output.image ??= p.image; - output.siteName ??= p.siteName; - output.url ??= p.url ?? url; - - if (output.hasAllMetadata) break; - } - - final url_ = output.url ?? url; - final image = output.image; - if (url_ != null && image != null) { - output.image = Uri.parse(url_).resolve(image).toString(); + for (final parser in allParsers) { + try { + output.vendor ??= parser.vendor; + output.title ??= parser.title; + output.desc ??= parser.desc; + if (output.vendor == Vendor.twitterPosting) { + if (output.image == null) { + if (parser.image?.contains('/media/') ?? false) { + output.image ??= parser.image; + } + } + } else { + output.image ??= parser.image; + } + output.siteName ??= parser.siteName; + output.publishDate ??= parser.publishDate; + output.likeAction ??= parser.likeAction; + output.shareAction ??= parser.shareAction; + if (output.hasAllMetadata) break; + } catch (e) { + Log.error(e); + } } return output; } - -Metadata? _openGraph(Document? document) { - try { - return OpenGraphParser(document).parse(); - } catch (e) { - return null; - } -} - -Metadata? _htmlMeta(Document? document) { - try { - return HtmlMetaParser(document).parse(); - } catch (e) { - return null; - } -} - -Metadata? _jsonLdSchema(Document? document) { - try { - return JsonLdParser(document).parse(); - } catch (e) { - return null; - } -} - -Metadata? _youtubeCard(Document? document) { - try { - return YoutubeParser(document).parse(); - } catch (e) { - return null; - } -} - -Metadata? _twitterCard(Document? document) { - try { - return TwitterParser(document).parse(); - } catch (e) { - return null; - } -} - -Metadata? _otherParser(Document? document) { - try { - return OtherParser(document).parse(); - } catch (e) { - return null; - } -} diff --git a/lib/src/views/camera/share_image_editor/layers/link_preview/parser/base.dart b/lib/src/views/camera/share_image_editor/layers/link_preview/parser/base.dart index 37a4e0e..da3ba60 100644 --- a/lib/src/views/camera/share_image_editor/layers/link_preview/parser/base.dart +++ b/lib/src/views/camera/share_image_editor/layers/link_preview/parser/base.dart @@ -1,29 +1,29 @@ +enum Vendor { mastodonSocialMediaPosting, youtubeVideo, twitterPosting } + mixin BaseMetaInfo { + late String url; String? title; String? desc; String? image; - String? url; String? siteName; + Vendor? vendor; + + DateTime? publishDate; + int? likeAction; // https://schema.org/LikeAction + int? shareAction; // https://schema.org/ShareAction + /// Returns `true` if any parameter other than [url] is filled. bool get hasData => ((title?.isNotEmpty ?? false) && title != 'null') || ((desc?.isNotEmpty ?? false) && desc != 'null') || ((image?.isNotEmpty ?? false) && image != 'null'); - - Metadata parse() { - return Metadata() - ..title = title - ..desc = desc - ..image = image - ..url = url - ..siteName = siteName; - } } /// Container class for Metadata. class Metadata with BaseMetaInfo { + Metadata(); bool get hasAllMetadata { - return title != null && desc != null && image != null && url != null; + return title != null && desc != null && image != null; } } diff --git a/lib/src/views/camera/share_image_editor/layers/link_preview/parser/html_parser.dart b/lib/src/views/camera/share_image_editor/layers/link_preview/parser/html.parser.dart similarity index 95% rename from lib/src/views/camera/share_image_editor/layers/link_preview/parser/html_parser.dart rename to lib/src/views/camera/share_image_editor/layers/link_preview/parser/html.parser.dart index 78d53c4..b386ab6 100644 --- a/lib/src/views/camera/share_image_editor/layers/link_preview/parser/html_parser.dart +++ b/lib/src/views/camera/share_image_editor/layers/link_preview/parser/html.parser.dart @@ -34,7 +34,4 @@ class HtmlMetaParser with BaseMetaInfo { ?.querySelector("meta[name='site_name']") ?.attributes .get('content'); - - @override - String toString() => parse().toString(); } diff --git a/lib/src/views/camera/share_image_editor/layers/link_preview/parser/json_ld.parser.dart b/lib/src/views/camera/share_image_editor/layers/link_preview/parser/json_ld.parser.dart new file mode 100644 index 0000000..e946065 --- /dev/null +++ b/lib/src/views/camera/share_image_editor/layers/link_preview/parser/json_ld.parser.dart @@ -0,0 +1,98 @@ +import 'dart:convert'; + +import 'package:html/dom.dart'; +import 'package:twonly/src/views/camera/share_image_editor/layers/link_preview/parser/base.dart'; +import 'package:twonly/src/views/camera/share_image_editor/layers/link_preview/parser/og.parser.dart'; +import 'package:twonly/src/views/camera/share_image_editor/layers/link_preview/parser/util.dart'; + +/// Parses [Metadata] from `json-ld` data in `