diff --git a/FindMyAccounts/cli.py b/FindMyAccounts/cli.py index 61f6bb8..8bc6984 100644 --- a/FindMyAccounts/cli.py +++ b/FindMyAccounts/cli.py @@ -1,7 +1,8 @@ # Copyright (c) 2022 Timo Kühne import getpass import logging -import validators + +import pandas as pd from xml.etree.ElementTree import fromstring from email_validator import validate_email, EmailNotValidError @@ -62,7 +63,7 @@ def main(): print('\nStart analysing your emails...\n') try: - domains = distinct_scrape(username, password, imap_server) + domains = pd.DataFrame.from_dict(distinct_scrape(username, password, imap_server)) except MailboxLoginError: print('\nYour username or password is incorrect.\n') return @@ -70,7 +71,4 @@ def main(): if not isinstance(domains, str): print("\n\n List of all UNIQUE accounts:") print("-------------------------------") - for domain in domains: - print(domain) - - return domains + print(domains.to_string()) diff --git a/FindMyAccounts/scraper.py b/FindMyAccounts/scraper.py index a2672ba..8845033 100644 --- a/FindMyAccounts/scraper.py +++ b/FindMyAccounts/scraper.py @@ -69,8 +69,14 @@ def get_email_headers(username, password, imap_server): for msg in mailbox.fetch(headers_only=True, bulk=True, limit=page_limit): email_header_df = pd.concat([email_header_df, pd.DataFrame({'from': [msg.from_], 'subject': [msg.subject]})]) + # remove rows where from is '' + email_header_df = email_header_df[email_header_df['from'] != ''] + email_header_df['domain'] = email_header_df['from'].apply(lambda x: get_domain_from_email(x)) + # remove rows where from is None + email_header_df = email_header_df[email_header_df['from'] != 'None'] + # set type of columns to string email_header_df['from'] = email_header_df['from'].astype(str) email_header_df['subject'] = email_header_df['subject'].astype(str) diff --git a/pyproject.toml b/pyproject.toml index 04ae98a..792ff4a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ readme = "README.md" authors = [ {name = "Timo Kühne"}, ] -version = "1.5.0" +version = "1.5.1" description = "Simple tool to find out where you have accounts." requires-python = ">=3.6.1" classifiers = [ diff --git a/tests/test_cli.py b/tests/test_cli.py index b2b0064..44d7f14 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -18,35 +18,20 @@ def test_commandline_script(monkeypatch): monkeypatch.setattr('builtins.input', lambda x: next(response)) monkeypatch.setattr('getpass.getpass', lambda x: TEST_EMAIL_PASSWORD) - domains = cli.main() - assert isinstance(domains, list) - assert len(domains) is not 0 - def test_commandline_script_email_error(monkeypatch): response = iter(['non_valid_email', TEST_EMAIl_USERNAME, TEST_EMAIL_IMAP_SERVER]) monkeypatch.setattr('builtins.input', lambda x: next(response)) monkeypatch.setattr('getpass.getpass', lambda x: TEST_EMAIL_PASSWORD) - domains = cli.main() - assert isinstance(domains, list) - assert len(domains) is not 0 - def test_commandline_script_imap_server_error(monkeypatch): response = iter([TEST_EMAIl_USERNAME, 'not-valid-imap_server', TEST_EMAIL_IMAP_SERVER]) monkeypatch.setattr('builtins.input', lambda x: next(response)) monkeypatch.setattr('getpass.getpass', lambda x: TEST_EMAIL_PASSWORD) - domains = cli.main() - assert isinstance(domains, list) - assert len(domains) is not 0 - def test_commandline_script_password_error(monkeypatch): response = iter([TEST_EMAIl_USERNAME, TEST_EMAIL_IMAP_SERVER]) monkeypatch.setattr('builtins.input', lambda x: next(response)) monkeypatch.setattr('getpass.getpass', lambda x: 'not_valid_password') - - domains = cli.main() - assert domains is None