Selenium #
Selenium adalah library otomasi browser yang memungkinkan kamu mengontrol Chrome, Firefox, Edge, dan browser lain secara programatik — mengisi form, mengklik tombol, mengambil data, dan memverifikasi tampilan persis seperti yang dilakukan pengguna nyata. Dua use case utamanya adalah end-to-end testing (memastikan alur pengguna bekerja dari browser ke database) dan web scraping (mengambil data dari halaman yang membutuhkan JavaScript untuk render). Artikel ini menggunakan Selenium 4 — API-nya berubah signifikan dari versi 3: metode find_element_by_* sudah dihapus, digantikan oleh find_element(By.*) yang lebih konsisten.
Instalasi #
pip install selenium webdriver-manager
webdriver-manager mengelola download dan path ChromeDriver/GeckoDriver secara otomatis — tidak perlu download manual atau konfigurasi PATH.
Setup WebDriver #
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.firefox.service import Service as FirefoxService
from selenium.webdriver.chrome.options import Options as ChromeOptions
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.firefox import GeckoDriverManager
# ANTI-PATTERN: cara lama Selenium 3 (executable_path deprecated)
driver = webdriver.Chrome(executable_path="/path/to/chromedriver") # ✗
# BENAR: Selenium 4 dengan webdriver-manager (auto-download driver)
def buat_chrome_driver(headless: bool = False) -> webdriver.Chrome:
options = ChromeOptions()
if headless:
options.add_argument("--headless=new") # mode tanpa UI (untuk CI/CD)
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--window-size=1920,1080")
options.add_argument("--disable-gpu")
# Sembunyikan tanda bahwa browser dikendalikan otomatis
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)
service = ChromeService(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)
driver.implicitly_wait(0) # matikan implicit wait -- gunakan explicit wait saja
return driver
def buat_firefox_driver(headless: bool = False) -> webdriver.Firefox:
options = webdriver.FirefoxOptions()
if headless:
options.add_argument("--headless")
service = FirefoxService(GeckoDriverManager().install())
return webdriver.Firefox(service=service, options=options)
# Penggunaan
driver = buat_chrome_driver(headless=False)
try:
driver.get("https://example.com")
print(driver.title)
finally:
driver.quit() # selalu tutup driver
Jangan gunakanimplicitly_waitbersamaan denganexplicit wait. Keduanya berinteraksi dengan cara yang tidak terduga dan bisa menyebabkan wait yang lebih lama dari yang diharapkan. Pilih satu pendekatan — explicit wait (WebDriverWait) jauh lebih baik karena dikontrol per elemen.
Menemukan Elemen #
Selenium 4 menggunakan find_element(By.*) dan find_elements(By.*). Semua metode find_element_by_* dari Selenium 3 sudah dihapus.
from selenium.webdriver.common.by import By
driver.get("https://contoh.com/login")
# ANTI-PATTERN: metode lama Selenium 3 (sudah dihapus di Selenium 4)
driver.find_element_by_id("username") # ✗ -- AttributeError
driver.find_element_by_xpath("//input") # ✗ -- AttributeError
# BENAR: Selenium 4
driver.find_element(By.ID, "username")
driver.find_element(By.NAME, "password")
driver.find_element(By.CLASS_NAME, "btn-submit")
driver.find_element(By.TAG_NAME, "h1")
driver.find_element(By.LINK_TEXT, "Lupa Password?")
driver.find_element(By.PARTIAL_LINK_TEXT, "Lupa")
driver.find_element(By.XPATH, "//button[@type='submit']")
driver.find_element(By.CSS_SELECTOR, "input[name='email']")
driver.find_element(By.CSS_SELECTOR, ".form-control.email-field")
# Ambil banyak elemen sekaligus
semua_link = driver.find_elements(By.TAG_NAME, "a")
produk_list = driver.find_elements(By.CSS_SELECTOR, ".produk-card")
for produk in produk_list:
nama = produk.find_element(By.CSS_SELECTOR, ".produk-nama").text
harga = produk.find_element(By.CSS_SELECTOR, ".produk-harga").text
print(f"{nama}: {harga}")
Strategi Pemilihan Locator #
Urutan preferensi locator (dari terbaik ke terburuk):
1. By.ID -- paling cepat dan stabil jika ada ID unik
2. By.CSS_SELECTOR -- cepat, ekspresif, lebih mudah dibaca dari XPath
3. By.XPATH -- powerful untuk navigasi kompleks, tapi lebih lambat
4. By.NAME -- untuk form input dengan atribut name
5. By.LINK_TEXT -- khusus untuk elemen <a>
6. By.CLASS_NAME -- hindari jika class generic (btn, form-control)
7. By.TAG_NAME -- hanya jika tag benar-benar unik di konteks itu
Hindari:
✗ XPath absolut: /html/body/div[2]/div[1]/span -- rapuh, berubah kalau HTML berubah
✗ Class yang generic: .container, .row, .col -- bisa cocok banyak elemen
Explicit Wait — Menunggu Elemen #
Halaman web modern menggunakan JavaScript yang merender konten secara async. Kamu harus menunggu elemen siap sebelum berinteraksi — jangan gunakan time.sleep() yang boros waktu.
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
def tunggu_elemen(driver, by, locator, timeout: int = 10):
"""Tunggu elemen muncul dan bisa diklik."""
return WebDriverWait(driver, timeout).until(
EC.element_to_be_clickable((by, locator))
)
def tunggu_teks(driver, by, locator, teks: str, timeout: int = 10):
"""Tunggu elemen mengandung teks tertentu."""
return WebDriverWait(driver, timeout).until(
EC.text_to_be_present_in_element((by, locator), teks)
)
# Expected conditions yang umum dipakai
wait = WebDriverWait(driver, 10)
# Tunggu elemen muncul di DOM (belum tentu visible)
wait.until(EC.presence_of_element_located((By.ID, "hasil")))
# Tunggu elemen visible dan bisa diklik
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']")))
# Tunggu elemen tidak lagi visible (loading spinner hilang)
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR, ".loading-spinner")))
# Tunggu URL berubah
wait.until(EC.url_contains("/dashboard"))
wait.until(EC.url_matches(r"/dashboard/\d+"))
# Tunggu title berubah
wait.until(EC.title_contains("Dashboard"))
# Tangani timeout dengan graceful
try:
elemen = WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.ID, "popup-promo"))
)
elemen.find_element(By.CSS_SELECTOR, ".close-btn").click()
except TimeoutException:
pass # popup tidak muncul, lanjutkan
# ANTI-PATTERN: time.sleep() yang boros dan tidak andal
import time
time.sleep(3) # ✗ -- selalu tunggu 3 detik meski elemen sudah muncul
elemen = driver.find_element(By.ID, "hasil")
# BENAR: explicit wait yang efisien
elemen = WebDriverWait(driver, 10).until( # ✓ -- berhenti segera saat elemen muncul
EC.presence_of_element_located((By.ID, "hasil"))
)
Interaksi dengan Elemen #
from selenium.webdriver.common.keys import Keys
# Mengisi dan submit form
email_field = driver.find_element(By.ID, "email")
password_field = driver.find_element(By.ID, "password")
email_field.clear()
email_field.send_keys("[email protected]")
password_field.clear()
password_field.send_keys("password123")
# Submit dengan Enter
password_field.send_keys(Keys.RETURN)
# Atau klik tombol submit
submit_btn = driver.find_element(By.CSS_SELECTOR, "button[type='submit']")
submit_btn.click()
# Membaca konten elemen
judul = driver.find_element(By.TAG_NAME, "h1").text
nilai = driver.find_element(By.ID, "total").get_attribute("value")
href = driver.find_element(By.CSS_SELECTOR, "a.btn-detail").get_attribute("href")
is_aktif = driver.find_element(By.ID, "checkbox-aktif").is_selected()
# Scroll ke elemen yang berada di luar viewport
elemen = driver.find_element(By.ID, "bagian-bawah")
driver.execute_script("arguments[0].scrollIntoView(true);", elemen)
# Klik menggunakan JavaScript (berguna jika elemen tertutupi overlay)
driver.execute_script("arguments[0].click();", elemen)
# Pilih dropdown (SELECT)
from selenium.webdriver.support.ui import Select
dropdown = Select(driver.find_element(By.ID, "kategori"))
dropdown.select_by_visible_text("Elektronik")
dropdown.select_by_value("elektronik")
dropdown.select_by_index(2)
ActionChains — Interaksi Kompleks #
from selenium.webdriver.common.action_chains import ActionChains
actions = ActionChains(driver)
# Hover (mouse over)
menu_item = driver.find_element(By.CSS_SELECTOR, ".nav-item.dropdown")
actions.move_to_element(menu_item).perform()
# Klik submenu setelah hover
submenu = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable((By.CSS_SELECTOR, ".dropdown-menu a:first-child"))
)
submenu.click()
# Drag and drop
sumber = driver.find_element(By.ID, "draggable")
tujuan = driver.find_element(By.ID, "droppable")
actions.drag_and_drop(sumber, tujuan).perform()
# Double click
elemen = driver.find_element(By.CSS_SELECTOR, ".item-editable")
actions.double_click(elemen).perform()
# Right click (context menu)
actions.context_click(elemen).perform()
# Keyboard shortcut
from selenium.webdriver.common.keys import Keys
body = driver.find_element(By.TAG_NAME, "body")
actions.key_down(Keys.CONTROL).send_keys("a").key_up(Keys.CONTROL).perform() # Ctrl+A
Page Object Model #
Page Object Model (POM) adalah pola arsitektur yang memisahkan logika interaksi halaman dari logika test. Setiap halaman web direpresentasikan sebagai kelas Python — perubahan UI hanya perlu diubah di satu tempat.
# pages/base_page.py
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
class BasePage:
def __init__(self, driver):
self.driver = driver
self.wait = WebDriverWait(driver, 10)
self.timeout = 10
def buka(self, url: str):
self.driver.get(url)
def temukan(self, by, locator):
return self.wait.until(EC.presence_of_element_located((by, locator)))
def klik(self, by, locator):
self.wait.until(EC.element_to_be_clickable((by, locator))).click()
def isi(self, by, locator, teks: str):
elemen = self.temukan(by, locator)
elemen.clear()
elemen.send_keys(teks)
def ambil_teks(self, by, locator) -> str:
return self.temukan(by, locator).text
def screenshot(self, nama_file: str):
self.driver.save_screenshot(f"screenshots/{nama_file}.png")
def tunggu_url_berubah(self, url_fragment: str):
self.wait.until(EC.url_contains(url_fragment))
# pages/login_page.py
from selenium.webdriver.common.by import By
from .base_page import BasePage
class LoginPage(BasePage):
# Locator didefinisikan di kelas, bukan di test
URL = "https://myapp.com/login"
EMAIL_INPUT = (By.ID, "email")
PASSWORD_INPUT = (By.ID, "password")
SUBMIT_BTN = (By.CSS_SELECTOR, "button[type='submit']")
ERROR_MSG = (By.CSS_SELECTOR, ".alert-danger")
REMEMBER_ME = (By.ID, "remember-me")
def buka_halaman_login(self):
self.buka(self.URL)
def login(self, email: str, password: str, ingat: bool = False):
self.isi(*self.EMAIL_INPUT, email)
self.isi(*self.PASSWORD_INPUT, password)
if ingat:
self.klik(*self.REMEMBER_ME)
self.klik(*self.SUBMIT_BTN)
def ambil_pesan_error(self) -> str:
try:
return self.ambil_teks(*self.ERROR_MSG)
except Exception:
return ""
def adalah_halaman_login(self) -> bool:
return "/login" in self.driver.current_url
# pages/dashboard_page.py
from selenium.webdriver.common.by import By
from .base_page import BasePage
class DashboardPage(BasePage):
WELCOME_MSG = (By.CSS_SELECTOR, ".welcome-message")
LOGOUT_BTN = (By.ID, "btn-logout")
PRODUK_COUNT = (By.CSS_SELECTOR, ".stat-produk .count")
def tunggu_dashboard_muncul(self):
self.tunggu_url_berubah("/dashboard")
def ambil_pesan_selamat_datang(self) -> str:
return self.ambil_teks(*self.WELCOME_MSG)
def logout(self):
self.klik(*self.LOGOUT_BTN)
def ambil_jumlah_produk(self) -> int:
return int(self.ambil_teks(*self.PRODUK_COUNT))
Integrasi dengan PyTest #
# tests/conftest.py
import pytest
import os
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
@pytest.fixture(scope="module")
def driver():
"""Driver Chrome — dibuat sekali per module test."""
options = Options()
options.add_argument("--headless=new")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--window-size=1920,1080")
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)
driver.implicitly_wait(0)
yield driver
driver.quit()
@pytest.fixture(autouse=True)
def screenshot_on_failure(driver, request):
"""Screenshot otomatis saat test gagal."""
yield
if request.node.rep_call.failed if hasattr(request.node, "rep_call") else False:
os.makedirs("screenshots", exist_ok=True)
nama = request.node.name.replace(" ", "_")
driver.save_screenshot(f"screenshots/FAIL_{nama}.png")
@pytest.hookimpl(tryfirst=True, hookwrapper=True)
def pytest_runtest_makereport(item, call):
"""Hook untuk mendeteksi kegagalan test."""
outcome = yield
rep = outcome.get_result()
setattr(item, "rep_" + rep.when, rep)
# tests/test_login.py
import pytest
from pages.login_page import LoginPage
from pages.dashboard_page import DashboardPage
BASE_URL = "https://myapp.com"
@pytest.fixture
def login_page(driver):
page = LoginPage(driver)
page.buka_halaman_login()
return page
@pytest.fixture
def dashboard_page(driver):
return DashboardPage(driver)
def test_login_berhasil(login_page, dashboard_page):
login_page.login("[email protected]", "password123")
dashboard_page.tunggu_dashboard_muncul()
assert "/dashboard" in login_page.driver.current_url
pesan = dashboard_page.ambil_pesan_selamat_datang()
assert "Selamat datang" in pesan
def test_login_password_salah(login_page):
login_page.login("[email protected]", "passwordsalah")
pesan_error = login_page.ambil_pesan_error()
assert "Email atau password salah" in pesan_error
assert login_page.adalah_halaman_login()
def test_login_email_kosong(login_page):
login_page.login("", "password123")
pesan_error = login_page.ambil_pesan_error()
assert pesan_error or login_page.adalah_halaman_login()
@pytest.mark.parametrize("email,password", [
("", "password"),
("bukan-email", "password"),
("[email protected]", ""),
])
def test_login_input_tidak_valid(login_page, email, password):
login_page.login(email, password)
assert login_page.adalah_halaman_login()
Web Scraping dengan Selenium #
Selenium untuk scraping digunakan saat konten dirender oleh JavaScript dan tidak bisa diambil dengan requests biasa.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import json, time
def scrape_produk_ecommerce(url: str) -> list[dict]:
driver = buat_chrome_driver(headless=True)
produk_list = []
try:
driver.get(url)
# Tunggu produk pertama muncul
WebDriverWait(driver, 15).until(
EC.presence_of_element_located((By.CSS_SELECTOR, ".product-card"))
)
# Scroll ke bawah untuk load lazy-loaded content
tinggi_lama = 0
while True:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(1.5)
tinggi_baru = driver.execute_script("return document.body.scrollHeight")
if tinggi_baru == tinggi_lama:
break
tinggi_lama = tinggi_baru
# Ambil data semua produk
cards = driver.find_elements(By.CSS_SELECTOR, ".product-card")
for card in cards:
try:
produk_list.append({
"nama": card.find_element(By.CSS_SELECTOR, ".product-name").text.strip(),
"harga": card.find_element(By.CSS_SELECTOR, ".product-price").text.strip(),
"rating": card.find_element(By.CSS_SELECTOR, ".product-rating").get_attribute("data-score"),
"url": card.find_element(By.CSS_SELECTOR, "a").get_attribute("href"),
})
except Exception:
continue # skip produk yang tidak lengkap
finally:
driver.quit()
return produk_list
# Simpan hasil ke JSON
hasil = scrape_produk_ecommerce("https://contoh-toko.com/produk")
with open("produk.json", "w", encoding="utf-8") as f:
json.dump(hasil, f, ensure_ascii=False, indent=2)
print(f"{len(hasil)} produk berhasil di-scrape.")
Ringkasan #
- Selenium 4 API baru — gunakan
find_element(By.ID, "...")bukanfind_element_by_id(); semua metodefind_element_by_*sudah dihapus di Selenium 4.webdriver-manager— gunakan untuk auto-download dan manajemen ChromeDriver/GeckoDriver; tidak perlu download manual atau konfigurasi PATH.- Explicit wait, bukan
time.sleep()— gunakanWebDriverWaitdenganexpected_conditionsyang tepat; lebih cepat dan lebih andal karena berhenti segera saat kondisi terpenuhi.- Matikan
implicitly_wait— set ke0dan gunakan hanya explicit wait; keduanya bercampur menyebabkan perilaku yang tidak terduga.- CSS Selector lebih disukai dari XPath — lebih mudah dibaca dan umumnya lebih cepat; gunakan XPath hanya untuk navigasi DOM yang kompleks atau saat CSS tidak cukup.
- Page Object Model untuk test suite — pisahkan locator dan interaksi ke kelas page tersendiri; perubahan UI hanya perlu diupdate di satu tempat.
driver.quit()wajib difinally— selalu tutup driver di blokfinallyatau gunakan fixture pytest agar browser tidak bocor meski test gagal.--headless=new— gunakan flag headless terbaru untuk Chrome di CI/CD; lebih stabil dari--headlesslama.- Screenshot saat test gagal — implementasikan hook pytest
pytest_runtest_makereportuntuk screenshot otomatis saat test gagal; sangat membantu debugging CI.- Scroll untuk lazy-loaded content — gunakan
execute_script("window.scrollTo...")untuk memuat konten yang baru muncul saat scroll, terutama saat scraping.
← Sebelumnya: PyTest