diff --git a/dags/hivemind_etl_helpers/notion_etl.py b/dags/hivemind_etl_helpers/notion_etl.py
index 0b1ca673..8cf93211 100644
--- a/dags/hivemind_etl_helpers/notion_etl.py
+++ b/dags/hivemind_etl_helpers/notion_etl.py
@@ -1,6 +1,8 @@
+import copy
 import logging
 
 from hivemind_etl_helpers.src.db.notion.extractor import NotionExtractor
+from llama_index.core import Document
 from tc_hivemind_backend.ingest_qdrant import CustomIngestionPipeline
 
 
@@ -56,7 +58,8 @@ def process(
         documents = self.notion_extractor.extract(
             page_ids=page_ids, database_ids=database_ids
         )
-        self.ingestion_pipeline.run_pipeline(docs=documents)
+        transformed_docs = self._transform_documents(documents=documents)
+        self.ingestion_pipeline.run_pipeline(docs=transformed_docs)
 
     def process_page(self, page_id: str) -> None:
         """
@@ -71,7 +74,8 @@ def process_page(self, page_id: str) -> None:
             f"Processing page_id: {page_id}, of community id: {self.community_id}"
         )
         documents = self.notion_extractor.extract_from_pages(page_ids=[page_id])
-        self.ingestion_pipeline.run_pipeline(docs=documents)
+        transformed_docs = self._transform_documents(documents=documents)
+        self.ingestion_pipeline.run_pipeline(docs=transformed_docs)
 
     def process_database(self, database_id: str) -> None:
         """
@@ -86,4 +90,31 @@ def process_database(self, database_id: str) -> None:
             f"Processing database id: {database_id}, of community id: {self.community_id}"
         )
         documents = self.notion_extractor.extract_from_database(database_id=database_id)
-        self.ingestion_pipeline.run_pipeline(docs=documents)
+        transformed_docs = self._transform_documents(documents=documents)
+        self.ingestion_pipeline.run_pipeline(docs=transformed_docs)
+
+    def _transform_documents(self, documents: list[Document]) -> list[Document]:
+        """
+        transform notion extracted documents by inserting their metadata a url
+
+        Parameters
+        ------------
+        documents : list[Document]
+            a list of notion extracted pages
+
+        Returns
+        ---------
+        documents : list[Document]
+            a list of documents each inlcuded with url in its metadata
+        """
+        # Copying
+        transformed_docs: list[Document] = copy.deepcopy(documents)
+
+        for doc in transformed_docs:
+            page_id: str | None = doc.metadata.get("page_id")
+            if page_id is None:
+                doc.metadata["url"] = None
+            else:
+                doc.metadata["url"] = f"https://www.notion.so/{page_id}"
+
+        return transformed_docs
diff --git a/dags/hivemind_etl_helpers/src/db/discord/utils/transform_discord_raw_messges.py b/dags/hivemind_etl_helpers/src/db/discord/utils/transform_discord_raw_messges.py
index fbbd65b9..cf04ddbe 100644
--- a/dags/hivemind_etl_helpers/src/db/discord/utils/transform_discord_raw_messges.py
+++ b/dags/hivemind_etl_helpers/src/db/discord/utils/transform_discord_raw_messges.py
@@ -95,6 +95,10 @@ def prepare_document(
     reactions = message["reactions"]
     raw_content = message["content"]
 
+    message_id = message["messageId"]
+    channel_id = message["channelId"]
+    thread_id = message["threadId"]
+
     reaction_ids = prepare_raction_ids(reactions)
 
     mention_names: list[str]
@@ -161,10 +165,16 @@ def prepare_document(
     # always has length 1
     assert len(author_name) == 1, "Either None or multiple authors!"
 
+    if thread_id is None:
+        url = f"https://discord.com/channels/{guild_id}/{channel_id}/{message_id}"
+    else:
+        url = f"https://discord.com/channels/{guild_id}/{thread_id}/{message_id}"
+
     msg_meta_data = {
         "channel": message["channelName"],
         "date": message["createdDate"].strftime(DATE_FORMAT),
         "author_username": author_name[0],
+        "url": url,
         # always including the thread_name, if `None`, then it was a channel message
         "thread": message["threadName"],
     }
@@ -234,6 +244,7 @@ def prepare_document(
             "replier_global_name",
             "replier_nickname",
             "role_mentions",
+            "url",
         ]
         doc.excluded_llm_metadata_keys = [
             "author_nickname",
@@ -250,6 +261,7 @@ def prepare_document(
             "replier_global_name",
             "replier_nickname",
             "role_mentions",
+            "url",
         ]
     else:
         doc = Document(text=content_url_updated)
diff --git a/dags/hivemind_etl_helpers/src/db/discourse/fetch_raw_posts.py b/dags/hivemind_etl_helpers/src/db/discourse/fetch_raw_posts.py
index 30b0017c..be579e8f 100644
--- a/dags/hivemind_etl_helpers/src/db/discourse/fetch_raw_posts.py
+++ b/dags/hivemind_etl_helpers/src/db/discourse/fetch_raw_posts.py
@@ -55,8 +55,10 @@ def fetch_raw_posts(
             author.username AS author_username,
             author.name AS author_name,
             t.title AS topic,
+            t.id AS topic_id,
             p.id AS postId,
             $forum_endpoint AS forum_endpoint,
+            p.postNumber as post_number,
             p.raw AS raw,
             p.createdAt AS createdAt,
             p.updatedAt AS updatedAt,
diff --git a/dags/hivemind_etl_helpers/src/db/discourse/utils/transform_raw_to_documents.py b/dags/hivemind_etl_helpers/src/db/discourse/utils/transform_raw_to_documents.py
index b9e4042a..56188eaf 100644
--- a/dags/hivemind_etl_helpers/src/db/discourse/utils/transform_raw_to_documents.py
+++ b/dags/hivemind_etl_helpers/src/db/discourse/utils/transform_raw_to_documents.py
@@ -33,12 +33,18 @@ def transform_raw_to_documents(
         doc: Document
 
         if not exclude_metadata:
+            forum_endpoint = post["forum_endpoint"]
+            topic_id = post["topic_id"]
+            post_number = post["post_number"]
+
+            link = f"https://{forum_endpoint}/t/{topic_id}/{post_number}"
+
             doc = Document(
                 text=post["raw"],
                 metadata={
                     "author_name": post["author_name"],
                     "author_username": post["author_username"],
-                    "forum_endpoint": post["forum_endpoint"],
+                    "forum_endpoint": forum_endpoint,
                     "createdAt": post["createdAt"],
                     "updatedAt": post["updatedAt"],
                     "postId": post["postId"],
@@ -49,6 +55,7 @@ def transform_raw_to_documents(
                     "liker_names": post["liker_names"],
                     "replier_usernames": post["replier_usernames"],
                     "replier_names": post["replier_names"],
+                    "link": link,
                 },
             )
         else:
diff --git a/dags/hivemind_etl_helpers/src/db/gdrive/gdrive_loader.py b/dags/hivemind_etl_helpers/src/db/gdrive/gdrive_loader.py
index a181388a..9ad91180 100644
--- a/dags/hivemind_etl_helpers/src/db/gdrive/gdrive_loader.py
+++ b/dags/hivemind_etl_helpers/src/db/gdrive/gdrive_loader.py
@@ -1,3 +1,4 @@
+import copy
 import logging
 import os
 from typing import List, Optional
@@ -49,7 +50,9 @@ def load_data(
             documents.extend(self._load_from_files(file_ids))
         if not documents:
             raise ValueError("One input at least must be given!")
-        return documents
+
+        transformed_documents = self._transform_google_documents(documents)
+        return transformed_documents
 
     def _load_from_folders(self, folder_ids: List[str]):
         folders_data = []
@@ -108,3 +111,19 @@ def _load_google_drive_creds(self) -> tuple[str, str]:
             raise ValueError("`GOOGLE_CLIENT_SECRET` not found from env variables!")
 
         return client_id, client_secret
+
+    def _transform_google_documents(self, documents: list[Document]) -> list[Document]:
+        """
+        transform google extracted documents by inserting their metadata a url
+        """
+        # copying
+        transformed_docs: list[Document] = copy.deepcopy(documents)
+
+        for doc in transformed_docs:
+            file_id: str | None = doc.metadata.get("file id")
+            if file_id is None:
+                doc.metadata["url"] = None
+            else:
+                doc.metadata["url"] = f"https://drive.google.com/file/d/{file_id}/view"
+
+        return transformed_docs
diff --git a/dags/hivemind_etl_helpers/src/db/github/extract/commit.py b/dags/hivemind_etl_helpers/src/db/github/extract/commit.py
index d3334599..f04a21c7 100644
--- a/dags/hivemind_etl_helpers/src/db/github/extract/commit.py
+++ b/dags/hivemind_etl_helpers/src/db/github/extract/commit.py
@@ -78,7 +78,7 @@ def _fetch_raw_commits(
                 user_commiter.login AS committer_name,
                 co.`commit.message` AS message,
                 co.`commit.url` AS api_url,
-                co.`parents.0.html_url` AS html_url,
+                co.`parents.0.html_url` AS url,
                 co.repository_id AS repository_id,
                 repo.full_name AS repository_name,
                 co.sha AS sha,
diff --git a/dags/hivemind_etl_helpers/src/db/github/schema/commit.py b/dags/hivemind_etl_helpers/src/db/github/schema/commit.py
index 1b07bea6..132a44e5 100644
--- a/dags/hivemind_etl_helpers/src/db/github/schema/commit.py
+++ b/dags/hivemind_etl_helpers/src/db/github/schema/commit.py
@@ -8,7 +8,7 @@ def __init__(
         committer_name: str,
         message: str,
         api_url: str,
-        html_url: str,
+        url: str,
         repository_id: int,
         repository_name: str,
         sha: str,
@@ -26,7 +26,7 @@ def __init__(
         self.committer_name = committer_name
         self.message = message
         self.api_url = api_url
-        self.html_url = html_url
+        self.url = url
         self.repository_id = repository_id
         self.repository_name = repository_name
         self.sha = sha
@@ -42,7 +42,7 @@ def from_dict(cls, data: dict[str, str | int | None]) -> "GitHubCommit":
             committer_name=data["committer_name"],  # type: ignore
             message=data["message"],  # type: ignore
             api_url=data["api_url"],  # type: ignore
-            html_url=data["html_url"],  # type: ignore
+            url=data["url"],  # type: ignore
             repository_id=data["repository_id"],  # type: ignore
             repository_name=data["repository_name"],  # type: ignore
             sha=data["sha"],  # type: ignore
@@ -58,7 +58,7 @@ def to_dict(self) -> dict[str, str | int | None]:
             "committer_name": self.committer_name,
             "message": self.message,
             "api_url": self.api_url,
-            "html_url": self.html_url,
+            "url": self.url,
             "repository_id": self.repository_id,
             "repository_name": self.repository_name,
             "sha": self.sha,
diff --git a/dags/hivemind_etl_helpers/src/db/github/transform/commits.py b/dags/hivemind_etl_helpers/src/db/github/transform/commits.py
index c52544f8..87743ed3 100644
--- a/dags/hivemind_etl_helpers/src/db/github/transform/commits.py
+++ b/dags/hivemind_etl_helpers/src/db/github/transform/commits.py
@@ -27,7 +27,7 @@ def transform_commits(data: list[GitHubCommit]) -> list[Document]:
             metadata=metadata,
             # all metadata to be excluded from embedding model
             excluded_embed_metadata_keys=list(metadata.keys()),
-            excluded_llm_metadata_keys=["sha", "api_url", "html_url", "verification"],
+            excluded_llm_metadata_keys=["sha", "api_url", "url", "verification"],
         )
         transformed_commits.append(document)
 
diff --git a/dags/hivemind_etl_helpers/src/db/telegram/transform/messages.py b/dags/hivemind_etl_helpers/src/db/telegram/transform/messages.py
index fe2423de..e8e0f25d 100644
--- a/dags/hivemind_etl_helpers/src/db/telegram/transform/messages.py
+++ b/dags/hivemind_etl_helpers/src/db/telegram/transform/messages.py
@@ -23,6 +23,9 @@ def transform(self, messages: list[TelegramMessagesModel]) -> list[Document]:
         """
         transformed_docs: list[Document] = []
 
+        # within links the "-100" of chat_id is removed
+        chat_id = str(self.chat_id).removeprefix("-100")
+
         for message in messages:
             document = Document(
                 text=message.message_text,
@@ -35,6 +38,7 @@ def transform(self, messages: list[TelegramMessagesModel]) -> list[Document]:
                     "replies": message.repliers,
                     "reactors": message.reactors,
                     "chat_name": self.chat_name,
+                    "url": f"https://t.me/c/{chat_id}/{message.message_id}",
                 },
                 excluded_embed_metadata_keys=[
                     "author",
@@ -44,6 +48,7 @@ def transform(self, messages: list[TelegramMessagesModel]) -> list[Document]:
                     "replies",
                     "reactors",
                     "chat_name",
+                    "url",
                 ],
                 excluded_llm_metadata_keys=[
                     "createdAt",
@@ -52,6 +57,7 @@ def transform(self, messages: list[TelegramMessagesModel]) -> list[Document]:
                     "replies",
                     "reactors",
                     "chat_name",
+                    "url",
                 ],
             )
             transformed_docs.append(document)
diff --git a/dags/hivemind_etl_helpers/tests/integration/test_discord_prepare_document_from_db.py b/dags/hivemind_etl_helpers/tests/integration/test_discord_prepare_document_from_db.py
index e72a2ca4..f92ce950 100644
--- a/dags/hivemind_etl_helpers/tests/integration/test_discord_prepare_document_from_db.py
+++ b/dags/hivemind_etl_helpers/tests/integration/test_discord_prepare_document_from_db.py
@@ -105,7 +105,7 @@ def test_transform_two_data(self):
             "reactions": [],
             "replied_user": None,
             "createdDate": datetime(2023, 5, 1),
-            "messageId": str(np.random.randint(1000000, 9999999)),
+            "messageId": "10000000000",
             "channelId": channels[0],
             "channelName": "channel1",
             "threadId": None,
@@ -123,7 +123,7 @@ def test_transform_two_data(self):
             "reactions": [],
             "replied_user": "114",
             "createdDate": datetime(2023, 5, 2),
-            "messageId": str(np.random.randint(1000000, 9999999)),
+            "messageId": "10000000001",
             "channelId": channels[1],
             "channelName": "channel2",
             "threadId": None,
@@ -141,7 +141,7 @@ def test_transform_two_data(self):
             "reactions": [],
             "replied_user": "114",
             "createdDate": datetime(2023, 5, 2),
-            "messageId": str(np.random.randint(1000000, 9999999)),
+            "messageId": "10000000002",
             "channelId": channels[1],
             "channelName": "channel2",
             "threadId": "88888",
@@ -159,7 +159,7 @@ def test_transform_two_data(self):
             "reactions": [],
             "replied_user": None,
             "createdDate": datetime(2023, 5, 8),
-            "messageId": str(np.random.randint(1000000, 9999999)),
+            "messageId": "10000000003",
             "channelId": channels[0],
             "channelName": "channel1",
             "threadId": None,
@@ -179,7 +179,7 @@ def test_transform_two_data(self):
             "reactions": [],
             "replied_user": None,
             "createdDate": datetime(2023, 5, 8),
-            "messageId": str(np.random.randint(1000000, 9999999)),
+            "messageId": "10000000004",
             "channelId": "734738382",
             "channelName": "channel1",
             "threadId": None,
@@ -271,6 +271,7 @@ def test_transform_two_data(self):
             "author_username": "user1",
             "author_global_name": "user1_GlobalName",
             "thread": None,
+            "url": "https://discord.com/channels/1234/111111/10000000000",
         }
 
         expected_metadata_1 = {
@@ -284,6 +285,7 @@ def test_transform_two_data(self):
             "replier_username": "user4",
             "replier_global_name": "user4_GlobalName",
             "thread": None,
+            "url": "https://discord.com/channels/1234/22222/10000000001",
         }
 
         expected_metadata_2 = {
@@ -298,6 +300,7 @@ def test_transform_two_data(self):
             "replier_global_name": "user4_GlobalName",
             "thread": "example_thread1",
             "role_mentions": ["role1"],
+            "url": "https://discord.com/channels/1234/88888/10000000002",
         }
 
         expected_metadata_3 = {
@@ -307,6 +310,7 @@ def test_transform_two_data(self):
             "author_global_name": "user1_GlobalName",
             "url_reference": {"[URL0]": "https://www.google.com"},
             "thread": None,
+            "url": "https://discord.com/channels/1234/111111/10000000003",
         }
         print(documents[0].text)
         self.assertDictEqual(documents[0].metadata, expected_metadata_0)
diff --git a/dags/hivemind_etl_helpers/tests/integration/test_discord_prepare_llama.py b/dags/hivemind_etl_helpers/tests/integration/test_discord_prepare_llama.py
index d6710b53..3c5fa5af 100644
--- a/dags/hivemind_etl_helpers/tests/integration/test_discord_prepare_llama.py
+++ b/dags/hivemind_etl_helpers/tests/integration/test_discord_prepare_llama.py
@@ -168,6 +168,7 @@ def test_transform_two_data(self):
             "author_username": "user1",
             "author_global_name": "user1_GlobalName",
             "author_nickname": "user1_nickname",
+            "url": f"https://discord.com/channels/{guild_id}/1313130/1111111110",
             "thread": None,
         }
 
@@ -180,6 +181,7 @@ def test_transform_two_data(self):
             "mention_global_names": ["user3_GlobalName", "user4_GlobalName"],
             "replier_username": "user4",
             "replier_global_name": "user4_GlobalName",
+            "url": f"https://discord.com/channels/{guild_id}/1313131/1111111111",
             "thread": None,
         }
 
@@ -192,6 +194,7 @@ def test_transform_two_data(self):
             "mention_global_names": ["user3_GlobalName", "user4_GlobalName"],
             "replier_username": "user4",
             "replier_global_name": "user4_GlobalName",
+            "url": f"https://discord.com/channels/{guild_id}/88888/1111111112",
             "thread": "example_thread1",
             "role_mentions": ["role1"],
         }
@@ -203,6 +206,7 @@ def test_transform_two_data(self):
             "author_global_name": "user1_GlobalName",
             "author_nickname": "user1_nickname",
             "url_reference": {"[URL0]": "https://www.google.com"},
+            "url": f"https://discord.com/channels/{guild_id}/1313133/1111111113",
             "thread": None,
         }
 
diff --git a/dags/hivemind_etl_helpers/tests/integration/test_discourse_fetch_posts.py b/dags/hivemind_etl_helpers/tests/integration/test_discourse_fetch_posts.py
index c304ac71..bb75d18b 100644
--- a/dags/hivemind_etl_helpers/tests/integration/test_discourse_fetch_posts.py
+++ b/dags/hivemind_etl_helpers/tests/integration/test_discourse_fetch_posts.py
@@ -55,7 +55,8 @@ def test_fetch_some_data_without_from_date(self):
                 p.topicId = 1,
                 p.id = 100,
                 p.createdAt = '2022-01-01T00:00:00.000Z',
-                p.updatedAt = '2022-01-01T01:00:00.000Z'
+                p.updatedAt = '2022-01-01T01:00:00.000Z',
+                p.postNumber = 1.0
             WITH p
             CREATE (a:DiscourseUser) -[:POSTED]->(p)
             SET
@@ -83,7 +84,8 @@ def test_fetch_some_data_without_from_date(self):
                 p.topicId = 2,
                 p.id = 101,
                 p.createdAt = '2022-01-01T00:01:00.000Z',
-                p.updatedAt = '2022-01-01T01:01:00.000Z'
+                p.updatedAt = '2022-01-01T01:01:00.000Z',
+                p.postNumber = 2.0
             WITH p
             CREATE (a:DiscourseUser) -[:POSTED]->(p)
             SET
@@ -119,6 +121,7 @@ def test_fetch_some_data_without_from_date(self):
             if data["author_username"] == "user#1":
                 self.assertEqual(data["author_name"], "user1")
                 self.assertEqual(data["topic"], "topic#1")
+                self.assertEqual(data["topic_id"], 1)
                 self.assertEqual(data["createdAt"], "2022-01-01T00:00:00.000Z")
                 self.assertEqual(data["updatedAt"], "2022-01-01T01:00:00.000Z")
                 self.assertEqual(data["authorTrustLevel"], 4)
@@ -130,9 +133,11 @@ def test_fetch_some_data_without_from_date(self):
                 self.assertEqual(data["replier_usernames"], ["user#2"])
                 self.assertEqual(data["replier_names"], ["user2"])
                 self.assertEqual(data["forum_endpoint"], "wwwdwadeswdpoi123")
+                self.assertEqual(data["post_number"], 1.0)
             elif data["author_username"] == "user#2":
                 self.assertEqual(data["author_name"], "user2")
                 self.assertEqual(data["topic"], "topic#2")
+                self.assertEqual(data["topic_id"], 2)
                 self.assertEqual(data["createdAt"], "2022-01-01T00:01:00.000Z")
                 self.assertEqual(data["updatedAt"], "2022-01-01T01:01:00.000Z")
                 self.assertEqual(data["raw"], "texttexttext of post 2")
@@ -144,6 +149,7 @@ def test_fetch_some_data_without_from_date(self):
                 self.assertEqual(data["replier_usernames"], [])
                 self.assertEqual(data["replier_names"], [])
                 self.assertEqual(data["forum_endpoint"], "wwwdwadeswdpoi123")
+                self.assertEqual(data["post_number"], 2.0)
             else:
                 raise IndexError("It shouldn't get here!")
 
@@ -166,7 +172,8 @@ def test_fetch_some_data_with_from_date(self):
                 p.topicId = 1,
                 p.id = 100,
                 p.createdAt = '2022-01-01T00:00:00.000Z',
-                p.updatedAt = '2022-01-01T01:00:00.000Z'
+                p.updatedAt = '2022-01-01T01:00:00.000Z',
+                p.postNumber = 1.0
             WITH p
             CREATE (a:DiscourseUser) -[:POSTED]->(p)
             SET
@@ -194,7 +201,8 @@ def test_fetch_some_data_with_from_date(self):
                 p.topicId = 2,
                 p.id = 101,
                 p.createdAt = '2022-05-01T00:01:00.000Z',
-                p.updatedAt = '2022-05-01T01:01:00.000Z'
+                p.updatedAt = '2022-05-01T01:01:00.000Z',
+                p.postNumber = 2.0
             WITH p
             CREATE (a:DiscourseUser) -[:POSTED]->(p)
             SET
@@ -230,6 +238,7 @@ def test_fetch_some_data_with_from_date(self):
             if data["author_username"] == "user#2":
                 self.assertEqual(data["author_name"], "user2")
                 self.assertEqual(data["topic"], "topic#2")
+                self.assertEqual(data["post_number"], 2.0)
                 self.assertEqual(data["createdAt"], "2022-05-01T00:01:00.000Z")
                 self.assertEqual(data["updatedAt"], "2022-05-01T01:01:00.000Z")
                 self.assertEqual(data["raw"], "texttexttext of post 2")
diff --git a/dags/hivemind_etl_helpers/tests/integration/test_gdrive_loader.py b/dags/hivemind_etl_helpers/tests/integration/test_gdrive_loader.py
index 6e2f8818..7f99b489 100644
--- a/dags/hivemind_etl_helpers/tests/integration/test_gdrive_loader.py
+++ b/dags/hivemind_etl_helpers/tests/integration/test_gdrive_loader.py
@@ -118,8 +118,33 @@ def test_load_by_file_id(self, mock_load_data):
         loader = GoogleDriveLoader(self.refresh_token)
         result = loader.load_data(file_ids=file_ids)
 
+        expected_results = [
+            Document(
+                id_="file_id_1.docx",
+                metadata={
+                    "file_name": "qwertU10p2.docx",
+                    "file id": "qwertU10p2",
+                    "author": "Jacku",
+                    "file name": "Option",
+                    "url": "https://drive.google.com/file/d/qwertU10p2/view",
+                },
+                relationships={},
+                text="Option 1: Keep it super casual",
+            ),
+            Document(
+                id_="file_id_2.docx",
+                metadata={
+                    "file_name": "qwertU10p3.docx",
+                    "file id": "qwertU10p3",
+                    "author": "Jacku",
+                    "file name": "Option",
+                    "url": "https://drive.google.com/file/d/qwertU10p3/view",
+                },
+                text="Option 1: Keep it super casual",
+            ),
+        ]
         self.assertEqual(len(result), 2)
-        self.assertEqual(result, mock_data)
+        self.assertEqual(result, expected_results)
 
     @patch.object(GoogleDriveReader, "load_data")
     def test_load_from_folders_exception(self, mock_reader):
@@ -255,3 +280,119 @@ def test__load_by_drive_id(self, mock_load_data):
         for i in range(4):
             self.assertEqual(result[i].id_, mock_data[i % 2].id_)
         self.assertEqual(len(result), 4)
+
+    def test_transform_single_document(self):
+        loader = GoogleDriveLoader(refresh_token=self.refresh_token)
+
+        documents = [
+            Document(
+                doc_id=1,
+                text="test",
+                metadata={
+                    "file id": "file_1",
+                    "author": "author_1",
+                    "file name": "file_name_1",
+                    "mime type": "mime",
+                    "created at": "date",
+                    "modified at": "modified",
+                },
+            )
+        ]
+        transformed_docs = loader._transform_google_documents(documents=documents)
+
+        self.assertEqual(len(transformed_docs), 1)
+        self.assertEqual(
+            transformed_docs[0].metadata,
+            {
+                "file id": "file_1",
+                "author": "author_1",
+                "file name": "file_name_1",
+                "mime type": "mime",
+                "created at": "date",
+                "modified at": "modified",
+                "url": "https://drive.google.com/file/d/file_1/view",
+            },
+        )
+
+    def test_transform_multiple_document(self):
+        loader = GoogleDriveLoader(refresh_token=self.refresh_token)
+
+        documents = [
+            Document(
+                doc_id=1,
+                text="test",
+                metadata={
+                    "file id": "file_1",
+                    "author": "author_1",
+                    "file name": "file_name_1",
+                    "mime type": "mime",
+                    "created at": "date",
+                    "modified at": "modified",
+                },
+            ),
+            Document(
+                doc_id=2,
+                text="test",
+                metadata={
+                    "file id": "file_2",
+                    "author": "author_2",
+                    "file name": "file_name_2",
+                    "mime type": "mime",
+                    "created at": "date",
+                    "modified at": "modified",
+                },
+            ),
+            Document(
+                doc_id=3,
+                text="test",
+                metadata={
+                    "file id": "file_3",
+                    "author": "author_3",
+                    "file name": "file_name_3",
+                    "mime type": "mime",
+                    "created at": "date",
+                    "modified at": "modified",
+                },
+            ),
+        ]
+        transformed_docs = loader._transform_google_documents(documents=documents)
+
+        self.assertEqual(len(transformed_docs), 3)
+        self.assertEqual(
+            transformed_docs[0].metadata,
+            {
+                "file id": "file_1",
+                "author": "author_1",
+                "file name": "file_name_1",
+                "mime type": "mime",
+                "created at": "date",
+                "modified at": "modified",
+                "url": "https://drive.google.com/file/d/file_1/view",
+            },
+        )
+
+        self.assertEqual(
+            transformed_docs[1].metadata,
+            {
+                "file id": "file_2",
+                "author": "author_2",
+                "file name": "file_name_2",
+                "mime type": "mime",
+                "created at": "date",
+                "modified at": "modified",
+                "url": "https://drive.google.com/file/d/file_2/view",
+            },
+        )
+
+        self.assertEqual(
+            transformed_docs[2].metadata,
+            {
+                "file id": "file_3",
+                "author": "author_3",
+                "file name": "file_name_3",
+                "mime type": "mime",
+                "created at": "date",
+                "modified at": "modified",
+                "url": "https://drive.google.com/file/d/file_3/view",
+            },
+        )
diff --git a/dags/hivemind_etl_helpers/tests/integration/test_github_etl_fetch_commits.py b/dags/hivemind_etl_helpers/tests/integration/test_github_etl_fetch_commits.py
index f68ef1f2..d81aa7c4 100644
--- a/dags/hivemind_etl_helpers/tests/integration/test_github_etl_fetch_commits.py
+++ b/dags/hivemind_etl_helpers/tests/integration/test_github_etl_fetch_commits.py
@@ -62,7 +62,7 @@ def test_get_single_commit_single_repo_no_from_date(self):
         self.assertEqual(commits[0].committer_name, "author #1")
         self.assertEqual(commits[0].message, "Issue #1 is resolved!")
         self.assertEqual(commits[0].api_url, "https://api.sample_url_for_commit.html")
-        self.assertEqual(commits[0].html_url, "https://sample_url_for_commit.html")
+        self.assertEqual(commits[0].url, "https://sample_url_for_commit.html")
         self.assertEqual(commits[0].repository_id, 123)
         self.assertEqual(commits[0].repository_name, "Org/SampleRepo")
         self.assertEqual(commits[0].sha, "sha#1111")
@@ -105,7 +105,7 @@ def test_get_single_commit_single_repo_no_from_date_no_commiter(self):
         self.assertEqual(commits[0].committer_name, None)
         self.assertEqual(commits[0].message, "Issue #1 is resolved!")
         self.assertEqual(commits[0].api_url, "https://api.sample_url_for_commit.html")
-        self.assertEqual(commits[0].html_url, "https://sample_url_for_commit.html")
+        self.assertEqual(commits[0].url, "https://sample_url_for_commit.html")
         self.assertEqual(commits[0].repository_id, 123)
         self.assertEqual(commits[0].repository_name, "Org/SampleRepo")
         self.assertEqual(commits[0].sha, "sha#1111")
@@ -150,7 +150,7 @@ def test_get_single_commit_single_repo_with_from_date(self):
         self.assertEqual(commits[0].author_name, "author #2")
         self.assertEqual(commits[0].message, "Issue #1 is resolved!")
         self.assertEqual(commits[0].api_url, "https://api.sample_url_for_commit.html")
-        self.assertEqual(commits[0].html_url, "https://sample_url_for_commit.html")
+        self.assertEqual(commits[0].url, "https://sample_url_for_commit.html")
         self.assertEqual(commits[0].repository_id, 123)
         self.assertEqual(commits[0].repository_name, "Org/SampleRepo2")
         self.assertEqual(commits[0].sha, "sha#1111")
@@ -228,7 +228,7 @@ def test_get_multiple_commit_multi_repo_with_from_date_filter(self):
         self.assertEqual(commits[0].committer_name, "author #1")
         self.assertEqual(commits[0].message, "Issue #1 is resolved!")
         self.assertEqual(commits[0].api_url, "https://api.sample_url_for_commit.html")
-        self.assertEqual(commits[0].html_url, "https://sample_url_for_commit.html")
+        self.assertEqual(commits[0].url, "https://sample_url_for_commit.html")
         self.assertEqual(commits[0].repository_id, 123)
         self.assertEqual(commits[0].repository_name, "Org/SampleRepo2")
         self.assertEqual(commits[0].sha, "sha#1111")
@@ -272,7 +272,7 @@ def test_fetch_commit_no_related_pr(self):
         self.assertEqual(commits[0].committer_name, None)
         self.assertEqual(commits[0].message, "Issue #1 is resolved!")
         self.assertEqual(commits[0].api_url, "https://api.sample_url_for_commit.html")
-        self.assertEqual(commits[0].html_url, "https://sample_url_for_commit.html")
+        self.assertEqual(commits[0].url, "https://sample_url_for_commit.html")
         self.assertEqual(commits[0].repository_id, 123)
         self.assertEqual(commits[0].repository_name, "Org/SampleRepo")
         self.assertEqual(commits[0].sha, "sha#1111")
diff --git a/dags/hivemind_etl_helpers/tests/integration/test_github_etl_fetch_raw_commits.py b/dags/hivemind_etl_helpers/tests/integration/test_github_etl_fetch_raw_commits.py
index be0f628d..3b991a8d 100644
--- a/dags/hivemind_etl_helpers/tests/integration/test_github_etl_fetch_raw_commits.py
+++ b/dags/hivemind_etl_helpers/tests/integration/test_github_etl_fetch_raw_commits.py
@@ -62,7 +62,7 @@ def test_get_single_commit_single_repo_no_from_date(self):
         self.assertEqual(
             commits[0]["api_url"], "https://api.sample_url_for_commit.html"
         )
-        self.assertEqual(commits[0]["html_url"], "https://sample_url_for_commit.html")
+        self.assertEqual(commits[0]["url"], "https://sample_url_for_commit.html")
         self.assertEqual(commits[0]["repository_id"], 123)
         self.assertEqual(commits[0]["repository_name"], "Org/SampleRepo")
         self.assertEqual(commits[0]["sha"], "sha#1111")
@@ -106,7 +106,7 @@ def test_get_single_commit_single_repo_with_from_date(self):
         self.assertEqual(
             commits[0]["api_url"], "https://api.sample_url_for_commit.html"
         )
-        self.assertEqual(commits[0]["html_url"], "https://sample_url_for_commit.html")
+        self.assertEqual(commits[0]["url"], "https://sample_url_for_commit.html")
         self.assertEqual(commits[0]["repository_id"], 123)
         self.assertEqual(commits[0]["repository_name"], "Org/SampleRepo2")
         self.assertEqual(commits[0]["sha"], "sha#1111")
diff --git a/dags/hivemind_etl_helpers/tests/integration/test_github_transformation.py b/dags/hivemind_etl_helpers/tests/integration/test_github_transformation.py
index 1f200073..b7e80090 100644
--- a/dags/hivemind_etl_helpers/tests/integration/test_github_transformation.py
+++ b/dags/hivemind_etl_helpers/tests/integration/test_github_transformation.py
@@ -83,7 +83,7 @@ def test_commit_transformation(self):
                 sha="sha#1000000",
                 created_at=datetime(2023, 11, 1).strftime("%Y-%m-%dT%H:%M:%SZ"),
                 latest_saved_at=datetime(2023, 12, 1, 1).strftime("%Y-%m-%dT%H:%M:%SZ"),
-                html_url="https://github.com/repo/commit/1",
+                url="https://github.com/repo/commit/1",
                 api_url="https://api.github.com/repo/commit/1",
                 repository_id=123,
                 repository_name="SampleRepo",
@@ -104,7 +104,7 @@ def test_commit_transformation(self):
                 "sha": "sha#1000000",
                 "created_at": "2023-11-01 00:00:00",
                 "latest_saved_at": "2023-12-01 01:00:00",
-                "html_url": "https://github.com/repo/commit/1",
+                "url": "https://github.com/repo/commit/1",
                 "api_url": "https://api.github.com/repo/commit/1",
                 "repository_id": 123,
                 "repository_name": "SampleRepo",
diff --git a/dags/hivemind_etl_helpers/tests/unit/test_github_transform_commits.py b/dags/hivemind_etl_helpers/tests/unit/test_github_transform_commits.py
index a41de6de..27150b70 100644
--- a/dags/hivemind_etl_helpers/tests/unit/test_github_transform_commits.py
+++ b/dags/hivemind_etl_helpers/tests/unit/test_github_transform_commits.py
@@ -20,7 +20,7 @@ def test_github_single_document(self):
                 sha="sha#1000000",
                 created_at=datetime(2023, 11, 1).strftime("%Y-%m-%dT%H:%M:%SZ"),
                 latest_saved_at=datetime(2023, 12, 1, 1).strftime("%Y-%m-%dT%H:%M:%SZ"),
-                html_url="https://github.com/repo/commit/1",
+                url="https://github.com/repo/commit/1",
                 api_url="https://api.github.com/repo/commit/1",
                 repository_id=123,
                 repository_name="SampleRepo",
@@ -42,7 +42,7 @@ def test_github_single_document(self):
                 "sha": "sha#1000000",
                 "created_at": "2023-11-01 00:00:00",
                 "latest_saved_at": "2023-12-01 01:00:00",
-                "html_url": "https://github.com/repo/commit/1",
+                "url": "https://github.com/repo/commit/1",
                 "api_url": "https://api.github.com/repo/commit/1",
                 "repository_id": 123,
                 "repository_name": "SampleRepo",
@@ -61,7 +61,7 @@ def test_multiple_documents(self):
                 sha="sha#1000000",
                 created_at=datetime(2023, 11, 1).strftime("%Y-%m-%dT%H:%M:%SZ"),
                 latest_saved_at=datetime(2023, 12, 1, 1).strftime("%Y-%m-%dT%H:%M:%SZ"),
-                html_url="https://github.com/repo/commit/1",
+                url="https://github.com/repo/commit/1",
                 api_url="https://api.github.com/repo/commit/1",
                 repository_id=123,
                 repository_name="SampleRepo",
@@ -75,7 +75,7 @@ def test_multiple_documents(self):
                 sha="sha#1000001",
                 created_at=datetime(2023, 11, 2).strftime("%Y-%m-%dT%H:%M:%SZ"),
                 latest_saved_at=datetime(2023, 12, 2, 1).strftime("%Y-%m-%dT%H:%M:%SZ"),
-                html_url="https://github.com/repo/commit/2",
+                url="https://github.com/repo/commit/2",
                 api_url="https://api.github.com/repo/commit/2",
                 repository_id=123,
                 repository_name="SampleRepo",
@@ -88,7 +88,7 @@ def test_multiple_documents(self):
                 sha="sha#1000002",
                 created_at=datetime(2023, 11, 3).strftime("%Y-%m-%dT%H:%M:%SZ"),
                 latest_saved_at=datetime(2023, 12, 3, 1).strftime("%Y-%m-%dT%H:%M:%SZ"),
-                html_url="https://github.com/repo/commit/3",
+                url="https://github.com/repo/commit/3",
                 api_url="https://api.github.com/repo/commit/3",
                 repository_id=126,
                 repository_name="SampleRepo#6",
@@ -114,7 +114,7 @@ def test_multiple_documents(self):
                 "sha": "sha#1000000",
                 "created_at": "2023-11-01 00:00:00",
                 "latest_saved_at": "2023-12-01 01:00:00",
-                "html_url": "https://github.com/repo/commit/1",
+                "url": "https://github.com/repo/commit/1",
                 "api_url": "https://api.github.com/repo/commit/1",
                 "repository_id": 123,
                 "repository_name": "SampleRepo",
@@ -132,7 +132,7 @@ def test_multiple_documents(self):
                 "sha": "sha#1000001",
                 "created_at": "2023-11-02 00:00:00",
                 "latest_saved_at": "2023-12-02 01:00:00",
-                "html_url": "https://github.com/repo/commit/2",
+                "url": "https://github.com/repo/commit/2",
                 "api_url": "https://api.github.com/repo/commit/2",
                 "repository_id": 123,
                 "repository_name": "SampleRepo",
@@ -150,7 +150,7 @@ def test_multiple_documents(self):
                 "sha": "sha#1000002",
                 "created_at": "2023-11-03 00:00:00",
                 "latest_saved_at": "2023-12-03 01:00:00",
-                "html_url": "https://github.com/repo/commit/3",
+                "url": "https://github.com/repo/commit/3",
                 "api_url": "https://api.github.com/repo/commit/3",
                 "repository_id": 126,
                 "repository_name": "SampleRepo#6",