Skip to content

Commit

Permalink
Merge pull request #19 from azuki774/use-standalone-selenium
Browse files Browse the repository at this point in the history
Use SA selenium-server
  • Loading branch information
azuki774 authored Dec 7, 2023
2 parents 1d3c55c + 4ad9f39 commit ba9a70c
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 61 deletions.
2 changes: 1 addition & 1 deletion build/Dockerfile-money-forward
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ghcr.io/azuki774/selenium-chrome:0.2.0
FROM python:3.11-slim-buster as runner
COPY requirements/ /tmp/
RUN pip install --upgrade pip && pip install -r /tmp/moneyforward_requirements.txt

Expand Down
33 changes: 21 additions & 12 deletions deployment/compose.yml
Original file line number Diff line number Diff line change
@@ -1,20 +1,29 @@
version: '3'
services:
fetcher-api:
image: bill-fetcher-api
container_name: bill-fetcher-api
env_file:
- api.env
selenium:
image: selenium/standalone-chrome:4.1.4-20220427
container_name: selenium
ports:
- "8080:9876"
- 4444:4444
- 7900:7900
volumes:
- /dev/shm:/dev/shm

# money-forward:
# image: bill-fetcher-money-forward
# container_name: bill-fetcher-money-forward
# fetcher-api:
# image: bill-fetcher-api
# container_name: bill-fetcher-api
# env_file:
# - money-forward.env
# volumes:
# - ./:/data/
# - api.env
# ports:
# - "8080:9876"

money-forward:
image: bill-fetcher-money-forward
container_name: bill-fetcher-money-forward
env_file:
- money-forward.env
volumes:
- ./:/data/

# remix:
# image: bill-fetcher-remix
Expand Down
1 change: 1 addition & 0 deletions deployment/money-forward.env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ [email protected]
pass=xxxxxx
urls="https://moneyforward.com/bs/xxxxx,https://moneyforward.com/accounts/yyyyy,https://moneyforward.com/cf" # 取得するページを , 区切りで記載
refresh_xpaths="xxxxxxxx,xxxxxxxx" # https://moneyforward.com に表示される金融機関等の[更新]ボタンのXPATHを , 区切りで記載
START_SLEEP=20
2 changes: 2 additions & 0 deletions requirements/moneyforward_requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
python-json-logger>=2.0.7
selenium==4.12.0
beautifulsoup4==4.12.2
34 changes: 20 additions & 14 deletions src/moneyforward/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,41 +11,42 @@
import logging
from pythonjsonlogger import jsonlogger

ROOTPAGE_URL="https://moneyforward.com"
ROOTPAGE_URL = "https://moneyforward.com"

lg = logging.getLogger(__name__)
lg.setLevel(logging.DEBUG)
h = logging.StreamHandler()
h.setLevel(logging.DEBUG)
json_fmt = jsonlogger.JsonFormatter(fmt='%(asctime)s %(levelname)s %(name)s %(message)s', json_ensure_ascii=False)
json_fmt = jsonlogger.JsonFormatter(
fmt="%(asctime)s %(levelname)s %(name)s %(message)s", json_ensure_ascii=False
)
h.setFormatter(json_fmt)
lg.addHandler(h)


def get_driver():
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--lang=ja-JP")
options.add_argument("--disable-dev-shm-usage")
UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
options.add_argument("--user-agent=" + UA)
chrome_service = Service(executable_path="/usr/bin/chromedriver")
driver = webdriver.Chrome(service=chrome_service, options=options)
driver = webdriver.Remote(
command_executor="http://selenium:4444/wd/hub",
options=webdriver.ChromeOptions(),
)
return driver


def main():
lg.info("fetcher start")
lg.info("wait sleep for starting server")
time.sleep(int(os.getenv("START_SLEEP", default="0")))
lg.info("Get driver")
driver = get_driver()
lg.info("Get driver ok")
driver.implicitly_wait(10)

# login
try:
html = money.login(driver)
except Exception as e:
lg.error("failed to login. maybe changing xpath: %s", e)
driver.quit()
sys.exit(1)
lg.info("login ok")

Expand All @@ -58,9 +59,10 @@ def main():
try:
money.press_from_xpath(driver, xpath)
lg.info("press update button: %s", xpath)
time.sleep(30) # 反映されるように30sec待っておく
time.sleep(30) # 反映されるように30sec待っておく
except Exception as e:
lg.warn('failed to press update button: %s', e)
lg.warn("failed to press update button: %s", e)
driver.quit()

# download HTML
for url in urls:
Expand All @@ -72,7 +74,11 @@ def main():
money.write_html(html, url + "_lastmonth")
except Exception as e:
lg.error("failed to get HTML: %s", e)
driver.quit()
sys.exit(1)

driver.quit()


if __name__ == "__main__":
main()
72 changes: 38 additions & 34 deletions src/moneyforward/money.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,17 @@
lg.setLevel(logging.DEBUG)
h = logging.StreamHandler()
h.setLevel(logging.DEBUG)
json_fmt = jsonlogger.JsonFormatter(fmt='%(asctime)s %(levelname)s %(name)s %(message)s', json_ensure_ascii=False)
json_fmt = jsonlogger.JsonFormatter(
fmt="%(asctime)s %(levelname)s %(name)s %(message)s", json_ensure_ascii=False
)
h.setFormatter(json_fmt)
lg.addHandler(h)

SAVE_DIR = "/data/"


def login(driver):
url = "https://id.moneyforward.com/sign_in/"
url = "https://moneyforward.com/cf" # for login page without account_selector
driver.get(url)

login_id = driver.find_element(
Expand All @@ -34,36 +36,36 @@ def login(driver):
)
login_id.send_keys(os.getenv("id"))

lg.info("input login")

email_button = driver.find_element(
by=By.XPATH,
value="/html/body/main/div/div/div[2]/div/section/div/form/div/button",
)
email_button.click()

password_form = driver.find_element(
by=By.XPATH,
value="/html/body/main/div/div/div[2]/div/section/div/form/div/div[2]/input",
)
password_form.send_keys(os.getenv("pass"))
lg.info("input email button")

login_button = driver.find_element(
by=By.XPATH,
value="/html/body/main/div/div/div[2]/div/section/div/form/div/button",
)
login_button.click()
try:
password_form = driver.find_element(
by=By.XPATH,
value="/html/body/main/div/div/div[2]/div/section/div/form/div/div[2]/input",
)
password_form.send_keys(os.getenv("pass"))
lg.info("input password")

# ---
# login money forward ME
url = "https://moneyforward.com/sign_in/"
driver.get(url)
login_button = driver.find_element(
by=By.XPATH,
value="/html/body/main/div/div/div[2]/div/section/div/form/div/button",
)
login_button.click()
lg.info("input login_button")

# choose account button
choose_button = driver.find_element(
by=By.XPATH,
value="/html/body/main/div/div/div[2]/div/section/div/form/div/button",
)
choose_button.click()
except Exception as e:
lg.info("maybe already login. skipped.")

rl = "https://moneyforward.com/"
driver.get(url)
html = driver.page_source.encode("utf-8")
return html

Expand Down Expand Up @@ -104,6 +106,7 @@ def move_page(driver, url):
driver.get(url)
return


def press_from_xpath(driver, xpath):
"""
指定したxpathのリンクを押す
Expand All @@ -116,6 +119,7 @@ def press_from_xpath(driver, xpath):
xpath_link.click()
return


def get_status(driver, xpaths):
"""
/html/body/div[1]/div[3]/div[1]/div[1]/div[2]/div[1]/div/section[3]/ul/li[3]/ul[2]/li[3]/a[2] <- key: 「更新」リンクのxpath
Expand All @@ -130,31 +134,31 @@ def get_status(driver, xpaths):

for xpath in xpaths:
base_xpath_list = xpath.split("/")[0:-3]
base_xpath = "/".join(base_xpath_list) # /html/body/div[1]/div[3]/div[1]/div[1]/div[2]/div[1]/div/section[3]/ul/li[3]
base_xpath = "/".join(
base_xpath_list
) # /html/body/div[1]/div[3]/div[1]/div[1]/div[2]/div[1]/div/section[3]/ul/li[3]
name_xpath = base_xpath + "/div/a[1]"
syncday_xpath = base_xpath + "/div/div"
sync_status_xpath = base_xpath + "/ul[2]/li[1]"

name = driver.find_element(
by=By.XPATH,
value=name_xpath
).get_attribute("textContent")
name = driver.find_element(by=By.XPATH, value=name_xpath).get_attribute(
"textContent"
)

syncday = driver.find_element(
by=By.XPATH,
value=syncday_xpath
).get_attribute("textContent")
syncday = driver.find_element(by=By.XPATH, value=syncday_xpath).get_attribute(
"textContent"
)

sync_status = driver.find_element(
by=By.XPATH,
value=sync_status_xpath
by=By.XPATH, value=sync_status_xpath
).get_attribute("textContent")

ret_f[name] = {"sync_day" : syncday, "sync_status": sync_status}
ret_f[name] = {"sync_day": syncday, "sync_status": sync_status}

ret_f_json = json.dumps(ret_f, ensure_ascii=False)
return ret_f_json


def write_html(html, url):
today = dt.date.today() # 出力:datetime.date(2020, 3, 22)
yyyymmdd = "{0:%Y%m%d}".format(today) # 20200322
Expand Down

0 comments on commit ba9a70c

Please sign in to comment.