WildSiphon · Nov 21, 2021
diff --git a/‎.gitignore
+2-1 b/‎.gitignore
+2-1
diff --git a/‎assets/banner.txt
-1 b/‎assets/banner.txt
-1
diff --git a/‎lib/digger.py
+29 b/‎lib/digger.py
+29
diff --git a/‎lib/googlePeopleAPI.py
-48 b/‎lib/googlePeopleAPI.py
-48
diff --git a/‎lib/google_people_api.py
+110 b/‎lib/google_people_api.py
+110
diff --git a/‎lib/maps.py
-272 b/‎lib/maps.py
-272
diff --git a/‎lib/maps_scraper.py
+390 b/‎lib/maps_scraper.py
+390
diff --git a/‎lib/seleniumWrapper.py
-30 b/‎lib/seleniumWrapper.py
-30
diff --git a/‎lib/selenium_wrapper.py
+38 b/‎lib/selenium_wrapper.py
+38
diff --git a/‎lib/user.py
+107 b/‎lib/user.py
+107
diff --git a/‎lib/youtube.py
-54 b/‎lib/youtube.py
-54
diff --git a/‎lib/youtube_scraper.py
+88 b/‎lib/youtube_scraper.py
+88
diff --git a/‎mailfogle.py
+38-194 b/‎mailfogle.py
+38-194
@@ -2,4 +2,5 @@
 emails.txt
 geckodriver.log
 lib/__pycache__/
-venv
+venv
+*test*
@@ -5,4 +5,3 @@
 ██║╚██╔╝██║██╔══██║██║██║     ██╔══╝  ██║   ██║██║   ██║██║     ██╔══╝  
 ██║ ╚═╝ ██║██║  ██║██║███████╗██║     ╚██████╔╝╚██████╔╝███████╗███████╗
 ╚═╝     ╚═╝╚═╝  ╚═╝╚═╝╚══════╝╚═╝      ╚═════╝  ╚═════╝ ╚══════╝╚══════╝
-
@@ -0,0 +1,29 @@
+from lib.user import User
+from lib.google_people_api import GooglePeopleApi
+from time import sleep
+
+
+class Digger:
+    def __init__(self, mails, browser):
+        self.users = None
+        self._create_users(mails=mails, browser=browser)
+
+    def _create_users(self, mails, browser):
+        self.gpa = GooglePeopleApi(mails=mails)
+        if not self.gpa.connected:
+            self.users = [User(mail=mail, browser=browser) for mail in mails]
+            return
+
+        users_info = []
+        while True:
+            data = self.gpa.get_data()
+            users_info.extend(data)
+
+            if len(self.gpa.mails) == 0:
+                break
+            sleep(2)
+
+        self.users = [User(browser=browser, **user_info) for user_info in users_info]
+
+    def as_dict(self):
+        return [user.as_dict() for user in self.users]
@@ -0,0 +1,110 @@
+import os.path
+
+from google.auth.transport.requests import Request
+from google.oauth2.credentials import Credentials
+from google_auth_oauthlib.flow import InstalledAppFlow
+from googleapiclient.discovery import build
+
+
+class GooglePeopleApi:
+    def __init__(self, mails):
+        self.scopes = ["https://www.googleapis.com/auth/contacts"]
+        self.creds = None
+        self.service = None
+        self.mails = mails
+        try:
+            self._connect()
+            print("Connected to Google people API")
+            self.connected = True
+            self._importContacts()
+        except:
+            print("Cannot connect to Google people API")
+            print("Retry after deleting 'token.json'")
+            self.connected = False
+
+    def _connect(self):
+        # Check if 'token.json' exist or not
+        if os.path.exists("token.json"):
+            self.creds = Credentials.from_authorized_user_file(
+                "token.json", self.scopes
+            )
+
+        # If there are no (valid) credentials available, let the user log in
+        if not self.creds or not self.creds.valid:
+            if not self.creds:
+                flow = InstalledAppFlow.from_client_secrets_file(
+                    "credentials.json", self.scopes
+                )
+                self.creds = flow.run_local_server(port=0)
+            elif self.creds.expired and self.creds.refresh_token:
+                self.creds.refresh(Request())
+
+            # Save the credentials for the next run
+            with open("token.json", "w") as token:
+                token.write(self.creds.to_json())
+
+        # Create service
+        self.service = build("people", "v1", credentials=self.creds)
+
+    def _importContacts(self):
+        # Import the mail as a contact to the account
+        for mail in self.mails:
+            self.service.people().createContact(
+                body={"emailAddresses": [{"value": mail}]}
+            ).execute()
+
+    def _downloadContacts(self):
+        results = (
+            self.service.people()
+            .connections()
+            .list(
+                pageSize=1000,
+                resourceName="people/me",
+                personFields="names,photos,emailAddresses,metadata",
+            )
+            .execute()
+        )
+        return results.get("connections", [])
+
+    def _deleteContact(self, name):
+        # Sometimes the google API has trouble deleting the contact
+        try:
+            self.service.people().deleteContact(resourceName=name).execute()
+        # Start again until it succeeds
+        except:
+            self._deleteContact(name)
+
+    def get_data(self):
+        connections = self._downloadContacts()
+        connections = list(
+            filter(
+                lambda contact: "emailAddresses" in contact.keys()
+                and contact["emailAddresses"][0]["value"] in self.mails,
+                connections,
+            )
+        )
+
+        users_data = []
+        for person in connections:
+            user = {}
+            mail = person["emailAddresses"][0]["value"]
+
+            if mail not in self.mails:
+                continue
+
+            user["mail"] = mail
+
+            if len(person["metadata"]["sources"]) > 1:
+                sources = person["metadata"]["sources"][1]
+                user["user_type"] = sources["profileMetadata"]["userTypes"][0].replace(
+                    "_", " "
+                )
+                user["google_ID"] = sources["id"]
+                user["profile_pic"] = person["photos"][0]["url"]
+
+            self._deleteContact(person["resourceName"])
+            self.mails.remove(user["mail"])
+
+            users_data.append(user)
+
+        return users_data
@@ -0,0 +1,390 @@
+import requests
+
+from bs4 import BeautifulSoup
+from lib.selenium_wrapper import SeleniumWrapper
+from time import sleep
+
+
+class MapsScraper:
+
+    # Global variable of the seconds to wait to be sure that content is loaded
+    DELAY = 5
+    # Set cookie for Google consent and "User Agent"
+    CONSENT = "YES+cb.20210622-13-p0.fr+F+528"
+    USER_AGENT = (
+        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0"
+    )
+
+    def __init__(self, google_ID, browser):
+        self.browser = browser
+
+        self.exist = None
+        self.is_public = None
+
+        self.name = None
+        self.local_guide = None
+        self.contributions = []
+        self.reviews = []
+        self.medias = {}
+
+        if google_ID:
+            self.url = f"https://www.google.com/maps/contrib/{google_ID}"
+            self._scrap_data()
+
+    def as_dict(self):
+        if not self.exist:
+            return None
+
+        data = {"url": self.url}
+        if self.local_guide:
+            data["local_guide"] = self.local_guide
+        if self.contributions:
+            data["contributions"] = self.contributions
+        if self.reviews:
+            data["reviews"] = self.reviews
+        if self.medias:
+            data["medias"] = self.medias
+        return data
+
+    def _scrap_data(self):
+
+        self._request()
+
+        if self.is_public:
+            self.driver = SeleniumWrapper(self.browser).driver
+            self._selenium_scrap()
+            self.driver.quit()
+
+    def _request(self):
+        # Setting up the request
+        session = requests.Session()
+        session.headers.update({"User-Agent": self.USER_AGENT})
+        consent_cookie = requests.cookies.create_cookie(
+            domain=".google.com", name="CONSENT", value=self.CONSENT
+        )
+        session.cookies.set_cookie(consent_cookie)
+
+        # Making the request
+        maps_request = session.get(self.url)
+        if maps_request.status_code == 404:
+            self.exist = False
+        elif maps_request.status_code == 200:
+            self.exist = True
+            self._minimal_scrap(BeautifulSoup(maps_request.text, "html.parser"))
+
+    def _minimal_scrap(self, html):
+        title = html.find("meta", attrs={"property": "og:title"})["content"].split(
+            "by "
+        )
+        if len(title) != 1:
+            self.is_public = True
+            self.name = title[1]
+        else:
+            self.is_public = False
+            return
+
+        description = html.find("meta", attrs={"property": "og:description"})[
+            "content"
+        ].split(" Local Guide | ")
+        if len(description) != 1:
+            self.local_guide = {
+                "level": int(description[0].replace("Level ", "")),
+                "points": int(description[1].replace(" Points", "").replace(",", "")),
+            }
+        else:
+            self.contributions = int(
+                description[0]
+                .replace(" Contributions", "")
+                .replace(" Contribution", "")
+            )
+
+    def selenium_scroll(self, here):
+        # Define end of scrolling
+        element = here.find_elements_by_xpath(
+            "//div[@data-review-id] | //div[@data-photo-bucket-id]"
+        )
+        if not element:
+            return
+
+        element = element[-1]
+        while True:
+            # Scroll
+            element.location_once_scrolled_into_view
+            # Be sure to load the page
+            sleep(3)
+            # Find last <div> of the section
+            nextElement = here.find_elements_by_xpath(
+                "//div[@data-review-id] | //div[@data-photo-bucket-id]"
+            )[-1]
+            # Do it again if not at the end, else break the loop
+            if nextElement == element:
+                return
+            element = nextElement
+
+    def _selenium_scrap(self):
+
+        # Open URL with Selenium
+        self.driver.get(self.url)
+
+        # Automate accepting cookies
+        cookies_button = self.driver.find_elements_by_xpath(
+            "//form[@action='https://consent.google.com/s']//button"
+        )[0]
+        cookies_button.click()
+
+        # Be sure to load the page
+        sleep(self.DELAY)
+
+        # Open contributions panel
+        contributions = self.driver.find_elements_by_xpath(
+            "//span[@jsaction='pane.profile-stats.showStats;keydown:pane.profile-stats.showStats']"
+        )[0]
+        contributions.click()
+
+        # Be sure to load the page
+        sleep(self.DELAY)
+
+        # Get informations from the contribution panel
+        contributions_content = self.driver.find_elements_by_xpath(
+            "//div[@id='modal-dialog']//h1/../../div"
+        )
+        contributions_header = contributions_content[0]
+        contributions_points = contributions_content[2]
+        contributions_stats = contributions_content[3].text.split("\n")[1::2]
+
+        # Scrap 'Level' and 'Points' if target is a 'Local Guide'
+        if contributions_points.text:
+            self.local_guide = {}
+            self.local_guide["level"] = int(contributions_header.text.split()[-1])
+            self.local_guide["points"] = int(
+                contributions_points.text.replace("\u202f", "").split("\n")[0]
+            )
+
+        # Add all the differents contributions statistics to a list
+        self.contributions = {}
+        self.contributions["reviews"] = int(contributions_stats[0])
+        self.contributions["ratings"] = int(contributions_stats[1])
+        self.contributions["photos"] = int(contributions_stats[2])
+        self.contributions["videos"] = int(contributions_stats[3])
+        self.contributions["answers"] = int(contributions_stats[4])
+        self.contributions["edits"] = int(contributions_stats[5])
+        self.contributions["placesAdded"] = int(contributions_stats[6])
+        self.contributions["roadsAdded"] = int(contributions_stats[7])
+        self.contributions["factsChecked"] = int(contributions_stats[8])
+        self.contributions["q&a"] = int(contributions_stats[9])
+        self.contributions["publishedLists"] = int(contributions_stats[10])
+
+        # Close contributions panel
+        self.driver.find_elements_by_xpath(
+            "//div[@id='modal-dialog']//button[@jsaction='modal.close']"
+        )[0].click()
+
+        # Be sure to load the page
+        sleep(self.DELAY)
+
+        # Checking if there are some ratings or reviews to scrap
+        if self.contributions["reviews"] or self.contributions["ratings"]:
+
+            # Click on the review's panel
+            review_panel = self.driver.find_elements_by_xpath(
+                "//div[@role='tablist']/button[1]"
+            )[0]
+            review_panel.click()
+
+            # Be sure to load the page
+            sleep(self.DELAY)
+
+            # Scroll in the layout section to load all the reviews to scrap
+            divs = self.driver.find_elements_by_xpath("//div")
+            layout_section = [
+                scrollbox_section
+                for scrollbox_section in divs
+                if "section-scrollbox" in scrollbox_section.get_attribute("class")
+            ][0]
+            self.selenium_scroll(layout_section)
+
+            # Scrap each review
+            self.reviews = []
+            for mpReview in layout_section.find_elements_by_xpath(
+                "//div[@role='button']/div[@data-review-id]"
+            ):
+                review = {}
+                # Separate title from content
+                title = mpReview.find_elements_by_xpath("div[@class]/div[@class]")[
+                    0
+                ].text.split("\n")
+                content = mpReview.find_elements_by_xpath("div[@class]/div[@class]")[1]
+
+                # Click on the 'Plus' button to load all the text
+                plus_button = content.find_elements_by_xpath("//jsl/button")
+                if plus_button:
+                    plus_button[0].click()
+
+                # From title
+                review["place"] = title[0]
+                if len(title) > 1:
+                    review["address"] = title[1]
+
+                # From content
+                firstLine = content.find_elements_by_xpath("./div")[0]
+
+                # Elements always in content
+                review["stars"] = int(
+                    firstLine.find_elements_by_xpath("./span[@class]")[0]
+                    .get_attribute("aria-label")
+                    .split("\xa0")[0]
+                    .replace(" ", "")
+                )
+                review["when"] = firstLine.find_elements_by_xpath("./span[@class]")[
+                    1
+                ].text
+
+                # Elements not there every time
+                try:  # Comment of the target
+                    nextLine = firstLine.find_elements_by_xpath("../div[@class]")[1]
+                    if nextLine.text != "":
+                        review["comment"] = nextLine.text
+                except:
+                    pass
+                try:  # "Visited in..." or "Owner's Response"
+                    nextLine = nextLine.find_elements_by_xpath("../div[@class]")[3]
+                    # Case with "Like" & "Share" instead of "Visited in..."
+                    if not nextLine.find_elements_by_xpath("./button"):
+                        # Case with "Owner's response" instead of "Visited in..."
+                        if "title" not in nextLine.find_elements_by_xpath("./span")[
+                            0
+                        ].get_attribute("class"):
+                            review["visited"] = nextLine.text
+                        else:
+                            review["ownersResponse"] = nextLine.text
+                except:
+                    pass
+
+                self.reviews.append(review)
+
+        # Check if there are some media to scrap to
+        if self.contributions["photos"] or self.contributions["videos"]:
+            # Going back to photos panel
+            medias_panel = self.driver.find_elements_by_xpath(
+                "//div[@role='tablist']/button[2]"
+            )[0]
+            medias_panel.click()
+
+            # Be sure to load the page
+            sleep(self.DELAY)
+
+            # Scroll in the layout section to load all the medias to scrap
+            divs = self.driver.find_elements_by_xpath("//div")
+            layout_section = [
+                scrollbox_section
+                for scrollbox_section in divs
+                if "section-scrollbox" in scrollbox_section.get_attribute("class")
+            ][0]
+            self.selenium_scroll(layout_section)
+
+            try:
+                # Scrap the number of times the medias has been seen by people
+                self.medias["views"] = int(
+                    layout_section.find_elements_by_xpath("div")[0]
+                    .text.replace("\u202f", "")
+                    .split("\n")[0]
+                    .split(" ")[1]
+                )
+                self.medias = {}
+            except IndexError:
+                # Medias are mentioned in contributions panel but none are scrapable
+                return
+
+            # Scrap each post with media
+            self.medias["content"] = []
+            for content in layout_section.find_elements_by_xpath(
+                ".//div[@role='button']"
+            ):
+                media = {}
+                media["medias"] = []
+
+                # Get the place and the address of the post
+                place_and_address = content.text.split("\n")
+
+                # Add the place and the address
+                media["place"] = place_and_address[0]
+                try:  # When place is "Unknown place" but had medias posted on it
+                    media["address"] = place_and_address[1]
+                except:
+                    pass
+
+                # For each media in the post
+                for med in content.find_elements_by_xpath(".//jsl"):
+
+                    # If the media is picture, "play button" is not displayed
+                    if (
+                        med.find_elements_by_xpath("./div/div")[-1].get_attribute(
+                            "style"
+                        )
+                        == "display: none;"
+                    ):
+
+                        img = None
+                        while not img:  # Waiting the picture to be loaded
+                            try:
+                                img = med.find_elements_by_xpath(".//img")[
+                                    0
+                                ].get_attribute("src")
+                            except:
+                                pass
+
+                        # Add its source to the array
+                        media["medias"].append(img)
+
+                    else:  # The media is a video
+
+                        # Click on the thumbnail to load the video in a new iFrame
+                        med.find_elements_by_xpath(".//img/..")[0].click()
+
+                        # Be sure to load the iFrame
+                        sleep(self.DELAY)
+
+                        # Find the iFrame and switch to it
+                        iframe = self.driver.find_elements_by_xpath(
+                            "//iframe[@class='widget-scene-imagery-iframe']"
+                        )[0]
+                        self.driver.switch_to.frame(iframe)
+
+                        vid = None
+                        while not vid:  # Waiting the video to be loaded
+                            try:
+                                vid = self.driver.find_elements_by_xpath("//video")[
+                                    0
+                                ].get_attribute("src")
+                            except:
+                                pass
+
+                        # Switch back to the default DOM
+                        self.driver.switch_to.default_content()
+
+                        # Add its source to the array
+                        media["medias"].append(vid)
+
+                self.medias["content"].append(media)
+
+    @property
+    def nb_contributions(self):
+        return sum(self.contributions[what] for what in self.contributions)
+
+    @property
+    def nb_medias(self):
+        if "content" not in self.medias:
+            return 0
+        return sum(len(c["medias"]) for c in self.medias["content"])
+
+    @property
+    def nb_reviews_ratings(self):
+        return len(self.reviews)
+
+    @property
+    def nb_displayed_reviews_ratings(self):
+        return self.contributions["reviews"] + self.contributions["ratings"]
+
+    @property
+    def nb_displayed_medias(self):
+        return self.contributions["photos"] + self.contributions["videos"]
@@ -0,0 +1,38 @@
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options as ChromeOptions
+from selenium.webdriver.firefox.options import Options as FirefoxOptions
+from sys import platform
+
+
+class SeleniumWrapper:
+    def __init__(self, browser):
+        self.browser = browser
+        self.driver = None
+        DRIVER_PATH = ""
+
+        # Detect OS
+        if platform.startswith("linux"):
+            EXT = ""
+        elif platform.startswith("win32"):
+            EXT = ".exe"
+        else:
+            raise Exception(
+                "The use of selenium is not supported for this OS. "
+                'Only "linux" and "win32" are possible\n',
+                "Scrapping only the name and the number of contributions "
+                "from Google Maps public profile",
+                sep="\n",
+            )
+
+        # Choose the good driver
+        if self.browser == "chrome":
+            options = ChromeOptions()
+            DRIVER_PATH = f"./drivers/chromedriver{EXT}"
+            self.driver = webdriver.Chrome(options=options, executable_path=DRIVER_PATH)
+
+        elif self.browser == "firefox":
+            options = FirefoxOptions()
+            DRIVER_PATH = f"./drivers/geckodriver{EXT}"
+            self.driver = webdriver.Firefox(
+                options=options, executable_path=DRIVER_PATH
+            )
@@ -0,0 +1,107 @@
+from lib.maps_scraper import MapsScraper
+from lib.youtube_scraper import YoutubeScraper
+
+
+class User:
+    def __init__(
+        self, browser, mail=None, user_type=None, google_ID=None, profile_pic=None
+    ):
+        self.mail = mail
+        self.user_type = user_type
+        self.google_ID = google_ID
+        self.profile_pic = profile_pic
+        self.username = self.mail.split("@")[0]
+
+        self.data_maps = MapsScraper(
+            google_ID=google_ID,
+            browser=browser,
+        )
+
+        self.data_youtube = YoutubeScraper(self.username)
+
+        self.print_informations()
+
+    def print_informations(self):
+        self._print_global_info()
+        self._print_maps_info()
+        self._print_youtube_info()
+
+    def _print_global_info(self):
+        print(
+            f"\n{self.mail} : "
+            f"{self.user_type if self.user_type else 'NOT A GOOGLE USER'}\n"
+        )
+        if self.name:
+            print(f"\tName : {self.name}")
+        if self.google_ID:
+            print(f"\tGoogle ID : {self.google_ID}")
+        if self.profile_pic:
+            print(f"\tProfile picture : {self.profile_pic}")
+
+    def _print_maps_info(self):
+        if not self.data_maps.exist:
+            return
+
+        print(f"\n\tMaps Contributions & Reviews ({self.data_maps.url})")
+
+        if not self.data_maps.is_public:
+            print("\tProfile is private, can't scrap informations from it")
+            return
+
+        if self.data_maps.local_guide:
+            print(
+                f"\t\tLocal Guide level {self.data_maps.local_guide['level']} with "
+                f"{self.data_maps.local_guide['points']} points"
+            )
+
+        if self.data_maps.contributions:
+            print(
+                f"\t\t{self.data_maps.nb_contributions} contributions including "
+                f"{self.data_maps.nb_displayed_reviews_ratings} reviews & ratings and "
+                f"{self.data_maps.nb_displayed_medias} medias"
+            )
+            print(
+                "\t\t\t"
+                + " " * len(str(self.data_maps.nb_contributions))
+                + f"scrapped in fact {self.data_maps.nb_reviews_ratings} "
+                f"reviews & ratings and {self.data_maps.nb_medias} medias"
+            )
+
+    def _print_youtube_info(self):
+        if not self.data_youtube.found:
+            print(f'\n\tYouTube : User "{self.username}" not found')
+            return
+
+        print(f'\n\tYouTube : User "{self.data_youtube.username}" found !')
+        creation = self.data_youtube.creation
+        creation_date = creation[: len(creation) - 6].replace("T", " ")
+        print(
+            f'\t\tChannel named "{self.data_youtube.channel}" '
+            f"created {creation_date}"
+        )
+        print(f"\t\t{self.data_youtube.url}")
+        print(
+            f"\t\t{sum(video['views'] for video in self.data_youtube.videos)} "
+            f"cumulative views on {len(self.data_youtube.videos)} "
+            "last posted video(s)"
+        )
+
+    def as_dict(self):
+        data = {"mail": self.mail}
+        if self.name:
+            data["name"] = self.name
+        if self.user_type:
+            data["user_type"] = self.user_type
+        if self.google_ID:
+            data["google_ID"] = self.google_ID
+        if self.profile_pic:
+            data["profile_pic"] = self.profile_pic
+        if self.data_maps:
+            data["maps"] = self.data_maps.as_dict()
+        if self.data_youtube:
+            data["youtube"] = self.data_youtube.as_dict()
+        return data
+
+    @property
+    def name(self):
+        return self.data_maps.name
@@ -0,0 +1,88 @@
+import requests
+
+from bs4 import BeautifulSoup
+
+
+class YoutubeScraper:
+    def __init__(self, username):
+        self.username = username
+        self.url = f"https://www.youtube.com/feeds/videos.xml?user={self.username}"
+
+        self.found = None
+
+        self.channel = None
+        self.creation = None
+        self.videos = None
+        self._scrap_data()
+
+    def _scrap_data(self):
+        # Making the request
+        youtube_request = requests.get(self.url)
+
+        if youtube_request.status_code == 404:  # Not found
+            self.found = False
+
+        elif youtube_request.status_code == 200:  # Found
+
+            html = BeautifulSoup(youtube_request.text, "html.parser")
+
+            # Get informations of the account
+            self.channel = html.title.string
+            self.url = html.title.find_next_sibling("link").get("href")
+            self.creation = html.published.string
+
+            # Get informations of each video
+            videos = []
+            for vid in html.find_all("entry"):
+                video = {}
+
+                video["title"] = vid.find("title").string
+                video["link"] = vid.find("link").get("href")
+                video["thumbnail"] = (
+                    vid.find("media:group").find("media:thumbnail").get("url")
+                )
+                video["description"] = (
+                    vid.find("media:group").find("media:description").string
+                )
+                video["published"] = vid.find("published").string
+                video["updated"] = vid.find("updated").string
+                video["views"] = int(
+                    vid.find("media:group")
+                    .find("media:community")
+                    .find("media:statistics")
+                    .get("views")
+                )
+                video["thumbUp"] = int(
+                    vid.find("media:group")
+                    .find("media:community")
+                    .find("media:starrating")
+                    .get("count")
+                )
+
+                # YouTube give a note based on a ratio of thumbs up and down ('star')
+                if video["thumbUp"] != "0":
+                    video["stars"] = float(
+                        vid.find("media:group")
+                        .find("media:community")
+                        .find("media:starrating")
+                        .get("average")
+                    )
+
+                videos.append(video)
+
+            self.videos = videos
+
+    def as_dict(self):
+        data = {"username": self.username}
+
+        if self.found:
+            data["url"] = self.url
+        if self.channel:
+            data["channel"] = self.channel
+        if self.url:
+            data["url"] = self.url
+        if self.creation:
+            data["creation"] = self.creation
+        if self.videos:
+            data["videos"] = self.videos
+        return data
@@ -1,237 +1,81 @@
-import json
 import argparse
-from sys import exit
-from time import sleep
-from lib.maps import mapsData
-from lib.youtube import youtubeData
-import lib.googlePeopleAPI as gpa
-
-def printBanner():
-    for line in open("assets/banner.txt","r"):
-        print(line.replace("\n",""))
-
-def printInformations(datas: dict):
-    """Print the main informations."""
-
-    user_type = (
-        ['NOT A GOOGLE USER'] if 'userTypes' not in datas
-        else [ut.replace('_',' ') for ut in datas['userTypes']]
-    )
-    print(f"\n{datas['mail']} : {', '.join(user_type)}\n")
-
-    # GOOGLE USER
-    if "userTypes" in datas:
-        if "name" in datas: print(f"\tName : {datas['name']}")
-        print(
-            f"\tGoogle ID : {datas['googleID']}" + 
-            f"\n\tProfile picture : {datas['profilePic']}"
-        )
-
-        # Maps profile not private
-        if "maps" in datas:
-            print(f"\n\tMaps Contributions & Reviews ({datas['maps']['url']})")
-
-            if "localGuide" in datas['maps']:
-                level  = datas['maps']['localGuide']['level']
-                points = datas['maps']['localGuide']['points']
-                print(f"\t\tLocal Guide level {level} with {points} points")
-
-            if isinstance(datas['maps']['contributions'],dict):
-                nbContrib = sum(
-                    datas['maps']['contributions'][what]
-                    for what in datas['maps']['contributions']
-                )
-                reviews_ratings = (
-                    datas['maps']['contributions']['reviews'] +
-                    datas['maps']['contributions']['ratings']
-                )
-                medias = (
-                    datas['maps']['contributions']['photos'] +
-                    datas['maps']['contributions']['videos']
-                )
-                print(
-                    f"\t\t{nbContrib} contributions including " +
-                    f"{reviews_ratings} reviews & ratings and {medias} medias"
-                )
-
-                count = 0
-                if datas['maps']['contributions']['photos'] or datas['maps']['contributions']['videos']:
-                    count = sum(
-                        len(c['medias']) 
-                        for c in datas['maps']['medias']['content']
-                    )
-                reviews_ratings = (
-                    len(datas['maps']['reviews'])
-                    if 'reviews' in datas['maps'] else 0
-                )
-                print(
-                    "\t\t\t" + " "*len(str(nbContrib)) +
-                    f"scrapped in fact {reviews_ratings} reviews & ratings " +
-                    f"and {count} medias"
-                )
-
-            else:
-                print(
-                    f"\t\t{datas['maps']['contributions']} contributions" +
-                    "/!\\ This data is sometimes wrong. " +
-                    "Configure Selenium to scrap more accurate informations /!\\"
-                )
-
-        else:
-            print(
-                "\n\tGoogle maps profile is private, " +
-                "can\'t scrap informations from it"
-            )
-
-    # YouTube informations
-    if "youtube" in datas:
-        print(
-            f"\tYouTube : User \"{datas['youtube']['username']}\" found " +
-            "/!\\ Maybe not the one you're looking for /!\\"
-        )
-        creation = datas['youtube']['creation']
-        creation_date = creation[:len(creation)-6].replace('T',' ')
-        print(
-            f"\t\tChannel \"{datas['youtube']['channel']}\" created {creation_date}"
-        )
-        print(f"\t\t{datas['youtube']['url']}")
-        print(
-            f"\t\t{sum(video['views'] for video in datas['youtube']['videos'])} " +
-            f"cumulative views on {len(datas['youtube']['videos'])} " +
-            "last posted video(s) found"
-        )
-
-def main(mails,output,browser):
-
-    apiFlag = False
-    try:
-        gpa.connect()
-        apiFlag = True
-        print("Connected to Google people API")
-    except:
-        print("Cannot connect to Google people API")
-        print("Retry after deleting \"token.json\"")
-
-    datas = []
-
-    if apiFlag:
-
-        gpa.importContacts(mails)
-        while True:
-
-            connections = gpa.downloadContacts()
-            connections = list(filter(
-                lambda contact : "emailAddresses" in contact.keys() 
-                    and contact['emailAddresses'][0]['value'] in mails,
-                connections,
-            ))
-            
-            for person in connections:
-                data = {}
-                mail = person['emailAddresses'][0]['value']
-
-                if mail in mails:
-                    data['mail'] = mail
-                    if len(person['metadata']['sources']) > 1:
-                        sources = person['metadata']['sources'][1]
-                        data['userTypes']  = sources['profileMetadata']['userTypes']
-                        data['googleID']   = sources['id']
-                        data['profilePic'] = person['photos'][0]['url']
-
-                        mpDatas = mapsData(
-                            url=(
-                                "https://www.google.com/maps/contrib/" +
-                                data['googleID']
-                            ),
-                            browser=browser,
-                        )
-                        if mpDatas: # If profile is public
-                            data['maps'] = mpDatas
-                            data['name'] = data['maps']['name']
-                            data['maps'].pop("name")
-
-                    ytDatas = youtubeData(mail.split("@")[0])
-                    if ytDatas: data['youtube'] = ytDatas
+import json
 
-                    printInformations(data)
+from lib.digger import Digger
 
-                    gpa.deleteContact(person['resourceName'])
-                    mails.pop(mails.index(mail))
 
-                    datas.append(data)
+def main(mails, output, browser):
 
-            if len(mails) == 0: break
-            sleep(2)
+    data = Digger(mails, browser)
 
-    else:
-        for mail in mails:
-            ytDatas = youtubeData(mail.split("@")[0])
-            data = {"mail" : mail}
-            if ytDatas :
-                data["youtube"] = ytDatas
-            printInformations(data)
-            datas.append(ytDatas)
+    with open((f"./{output}.json"), "w") as f:
+        json.dump(data.as_dict(), f, indent=2, ensure_ascii=False)
 
-    with open((f"./{output}.json"),"w") as f:
-        json.dump(datas,f, indent=2)
 
 if __name__ == "__main__":
-    
     parser = argparse.ArgumentParser(
-        description="Explore and scrap user\'s public data from Google account"
+        description="Explore and scrap user's public data from Google account",
     )
     parser.add_argument(
         "-e",
-        # metavar="EMAIL",
+        "--email",
         dest="email",
         type=str,
         nargs="?",
         default=None,
-        help="target\'s mail"
+        help="target's mail",
     )
     parser.add_argument(
         "-f",
+        "--file",
         dest="file",
         type=str,
         nargs="?",
         default=None,
-        help="path to a file listing the email addresses of the targets"
+        help="path to a file listing the email addresses of the targets",
     )
     parser.add_argument(
         "-o",
+        "--output",
         dest="output",
         type=str,
         nargs="?",
+        required=False,
         default="output",
-        help="choose output name (default is \"output\")",
+        help="choose output name (default is 'output')",
     )
     parser.add_argument(
         "-b",
+        "--browser",
         dest="browser",
-        choices=["firefox","chrome"],
+        type=str.lower,
+        choices=["firefox", "chrome"],
+        required=False,
         default="firefox",
-        help="select browser \"chrome\" or \"firefox\" (default is \"firefox\")",
+        help='select browser "chrome" or "firefox" (default is "firefox")',
+    )
+    parser.add_argument(
+        "--no-banner",
+        dest="nobanner",
+        required=False,
+        default=False,
+        action="store_true",
+        help="doesn't display banner",
     )
     args = parser.parse_args()
 
-    printBanner()
+    if not args.nobanner:
+        print(open("assets/banner.txt", "r").read())
 
-    mails = []
-
-    if args.email: mails.append(args.email)
-    if args.file:  mails.extend(open(args.file).read().splitlines())
+    if not (args.email or args.file):
+        parser.error("Please specify email(s) to dig")
 
-    if not mails:
-        exit(
-            "Please specify target\'s mail\n" +
-            "mailfogle.py [-h] for more informations"
-        )
+    mails = []
+    if args.email:
+        mails.append(args.email)
+    if args.file:
+        mails.extend(open(args.file).read().splitlines())
 
-    if args.browser.lower() not in ['firefox','chrome']:
-        exit(
-            "Please choose a browser between \"Firefox\" and \"Chrome\"\n" + 
-            "mailfogle.py [-h] for more informations"
-        )
-    else: browser = args.browser.lower()
+    browser = args.browser
 
-    main(mails=mails,output=args.output,browser=browser)
+    main(mails=mails, output=args.output, browser=browser)