Ceci est une expérimentation, et n'est pas encore terminé
TODO ajouter warning etc.
Note : je n'avais jamais utilisé selenium ou ce genre de module pour contrôler un navigateur en mode "headless". Ce n'est pas très compliqué, et j'ai trouvé que ce petit tutoriel est bien fait : https://pythonbasics.org/selenium-firefox-headless/ Cette documentation est bien complète, pour le module Python de selenium.
from selenium import webdriver
try:
URL = "https://GitHub.com/Naereen"
print(f"Downloading '{URL}'...")
firefoxOptions = webdriver.FirefoxOptions()
firefoxOptions.headless = True
browser = webdriver.Firefox(options=firefoxOptions)
browser.get('https://GitHub.com/Naereen')
print(browser.page_source[:500])
finally:
try:
browser.close()
except:
pass
Downloading 'https://GitHub.com/Naereen'... <html lang="en"><head> <meta charset="utf-8"> <link rel="dns-prefetch" href="https://github.githubassets.com"> <link rel="dns-prefetch" href="https://avatars0.githubusercontent.com"> <link rel="dns-prefetch" href="https://avatars1.githubusercontent.com"> <link rel="dns-prefetch" href="https://avatars2.githubusercontent.com"> <link rel="dns-prefetch" href="https://avatars3.githubusercontent.com"> <link rel="dns-prefetch" href="https://github-cloud.s3.amazonaws.com"> <link rel="d
J'ai bien réussi à installer et utiliser Selenium. Ca marche bien !
J'aurai besoin de ces modules là :
import time
from datetime import datetime
import urllib.request
import subprocess
from selenium import webdriver
Et je vais avoir besoin de cette fonction là, qui vient de https://stackoverflow.com/a/47425305/, pour télécharger l'attestation générée par le site, qui est donnée sous la forme d'une adresse blob:https://media.interieur.gouv.fr/ca78c998-724b-4540-8152-2eb269b06eab
import base64
def get_file_content_chrome(browser, uri):
""" Use selenium [browser] to download blob [uri].
- Source https://stackoverflow.com/a/47425305/
"""
result = browser.execute_async_script("""
var uri = arguments[0];
var callback = arguments[1];
var toBase64 = function(buffer){for(var r,n=new Uint8Array(buffer),t=n.length,a=new Uint8Array(4*Math.ceil(t/3)),i=new Uint8Array(64),o=0,c=0;64>c;++c)i[c]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".charCodeAt(c);for(c=0;t-t%3>c;c+=3,o+=4)r=n[c]<<16|n[c+1]<<8|n[c+2],a[o]=i[r>>18],a[o+1]=i[r>>12&63],a[o+2]=i[r>>6&63],a[o+3]=i[63&r];return t%3===1?(r=n[t-1],a[o]=i[r>>2],a[o+1]=i[r<<4&63],a[o+2]=61,a[o+3]=61):t%3===2&&(r=(n[t-2]<<8)+n[t-1],a[o]=i[r>>10],a[o+1]=i[r>>4&63],a[o+2]=i[r<<2&63],a[o+3]=61),new TextDecoder("ascii").decode(a)};
var xhr = new XMLHttpRequest();
xhr.responseType = 'arraybuffer';
xhr.onload = function(){ callback(toBase64(xhr.response)) };
xhr.onerror = function(){ callback(xhr.status) };
xhr.open('GET', uri);
xhr.send();
""", uri)
if type(result) == int :
raise Exception("Request failed with status %s" % result)
return base64.b64decode(result)
# bytes = get_file_content_chrome(browser, "blob:https://developer.mozilla.org/7f9557f4-d8c8-4353-9752-5a49e85058f5")
Et maintenant je peux écrire une fonction qui génère une attestation et la sauvegarde, avec les détails (nom, adresse etc) donnés en argument.
Il suffira ensuite de lancer cette fonction toutes les 55 minutes et m'envoyer un mail.
def download_attestation(details):
""" Fill the form on https://media.interieur.gouv.fr/deplacement-covid-19/ with details, and save the PDF attestation."""
download_name = None
try:
firefoxOptions = webdriver.FirefoxOptions()
firefoxOptions.headless = True
browser = webdriver.Firefox(options=firefoxOptions)
URL = "https://media.interieur.gouv.fr/deplacement-covid-19/"
browser.get(URL)
# keep this check, to be sure that the downloaded webpage was the correct one, update if needed!
page_source = browser.page_source
page_source_100 = '<html class="fontawesome-i2svg-active fontawesome-i2svg-complete" lang="fr"><head><meta charset="UTF'
assert page_source[:100] == page_source_100
# Now, let's fill with details from the input dictionnary details (see below for example)
# - field-firstname
# - field-lastname
# - field-birthday
# - field-placeofbirth
# - field-address
# - field-city
# - field-zipcode
# - field-datesortie : may be missing
# - field-heuresortie : may be missing
# automatically add current date/time if not present
now = datetime.now()
if 'datesortie' not in details:
details['datesortie'] = f"{now:%Y-%m-%d}"
if 'heuresortie' not in details:
details['heuresortie'] = f"{now:%H:%M}"
# fill #field-XXX with YYY, read from the input dictionnary
for fieldname, value in details.items():
hidden_value = '*' * len(value)
print(f"Filling the form '{fieldname}' with value '{hidden_value}'...")
input_field = browser.find_element_by_id(f"field-{fieldname}")
input_field.clear()
input_field.send_keys(value)
# this check is useful, to be sure that we don't try to generate a PDF
# if an input field was not correctly filled
for fieldname, value in details.items():
print(f"Checking value of the form '{fieldname}'...")
input_field = browser.find_element_by_id(f"field-{fieldname}")
its_new_value = input_field.get_attribute("value")
if its_new_value != value:
print(f"Error: the form '{fieldname}' has value '{its_new_value}' != '{value}'.")
# click on '#checkbox-achats'
print("Clicking on '#checkbox-achats'...")
checkbox_achats = browser.find_element_by_id("checkbox-achats")
checkbox_achats.click()
# click on '#generate-btn'
print("Clicking on 'generate-btn'...")
generate_btn = browser.find_element_by_id("generate-btn")
# TODO how to configure the path of the file to save?
generate_btn.click()
# now wait 5 seconds (probably not mandatory...)
print("New sleeping 5 seconds...")
time.sleep(5)
# check that there is a new <a href="..." download="..."> link
# <a href="blob:https://media.interieur.gouv.fr/712fc9d2-6967-4e63-b9c3-870175b6258f" download="attestation-2020-10-31_12-40.pdf"></a>
all_a_links = browser.find_elements_by_css_selector("a")
# window.document.getElementsByTagName("a")
for link in all_a_links:
try:
href = link.get_attribute("href")
download = link.get_attribute("download")
if download:
print(f"Found a new <a href='...'> link! with href = {href}")
print(f" and it has a download = {download}")
print("Downloading the file and save it!")
# 1st try...
bytes_download = get_file_content_chrome(browser, href)
download_name = download
with open(download_name, "wb") as download_file:
download_file.write(bytes_download)
print(f"The PDF file {download_name} is now saved!")
## 2nd try... this was NOT working
# download_name = download.replace(".pdf", "_2.pdf")
#urllib.request.urlretrieve(href, download_name)
except:
pass
# let's close the browser and finish
finally:
try:
browser.close()
except:
pass
return download_name
TODO?
Maintenant je peux envoyer les attestations téléchargées "à la main" (mais ça marche parce que IPython/Jupyter c'est magique!
Je vais les mettre dans un dossier (privé, caché et protégé par mot de passe), sur mon site web, avec un sous-dossier par jour.
def send_attestations():
now = datetime.now()
today = f"{now:%Y-%m-%d}"
!ls -larth *pdf
!echo CP attestation-$(date '+%Y-%m-%d')_*.pdf ${Szam}attestations/$(date '+%Y-%m-%d')/
!CP attestation-$(date '+%Y-%m-%d')_*.pdf ${Szam}attestations/$(date '+%Y-%m-%d')/
En gros, ça fait juste ça :
!ls -larth *pdf
ls: impossible d'accéder à '*pdf': Aucun fichier ou dossier de ce type
!echo CP attestation-$(date '+%Y-%m-%d')_*.pdf ${Szam}attestations/$(date '+%Y-%m-%d')/
CP attestation-2020-10-31_*.pdf besson@zamok.crans.org:~/www/attestations/2020-10-31/
# TODO read from a file, like a .ini config file
details = {
'firstname': "Camille",
'lastname': "Dupont",
'birthday': "01/01/1970",
'placeofbirth': "Paris",
'address': "999 avenue de France",
'city': "Paris",
'zipcode': "75001",
#'datesortie': f"{now:%Y-%m-%d}",
#'heuresortie': f"{now:%H:%M}",
}
%%time
download_name = download_attestation(details)
if download_name:
print(f"SUCCESS: the PDF attestation was saved to '{download_name}'!")
Filling the form 'firstname' with value '*******'... Filling the form 'lastname' with value '******'... Filling the form 'birthday' with value '**********'... Filling the form 'placeofbirth' with value '*****'... Filling the form 'address' with value '********************'... Filling the form 'city' with value '*****'... Filling the form 'zipcode' with value '*****'... Filling the form 'datesortie' with value '**********'... Filling the form 'heuresortie' with value '*****'... Checking value of the form 'firstname'... Checking value of the form 'lastname'... Checking value of the form 'birthday'... Checking value of the form 'placeofbirth'... Checking value of the form 'address'... Checking value of the form 'city'... Checking value of the form 'zipcode'... Checking value of the form 'datesortie'... Checking value of the form 'heuresortie'... Clicking on '#checkbox-achats'... Clicking on 'generate-btn'... New sleeping 5 seconds... Found a new <a href='...'> link! with href = blob:https://media.interieur.gouv.fr/2451a67f-5957-4ede-8fd2-9b8ef9d0f66c and it has a download = attestation-2020-10-31_14-08.pdf Downloading the file and save it! The PDF file attestation-2020-10-31_14-08.pdf is now saved! SUCCESS: the PDF attestation was saved to 'attestation-2020-10-31_14-08.pdf'! CPU times: user 95.5 ms, sys: 11.9 ms, total: 107 ms Wall time: 8.66 s
!ls -larth *.pdf
-rw-r--r-- 1 lilian lilian 42K oct. 31 14:08 exemple-attestation.pdf
On peut afficher le document PDF ainsi produit à l'intérieur du notebook Jupyter (merci à cette réponse StackOverflow) :
from IPython.display import IFrame
IFrame("exemple-attestation.pdf", width=800, height=500)
Youpi j'ai réussi!
Pour ne pas montrer mes données personnelles dans ce notebook (public), je les ai écrite dans un petit fichier JSON, lu avec le module json de Python.
import json
!file details_lilian.json
!ls -larth details_lilian.json
details_lilian.json: ASCII text, with no line terminators -rw-r--r-- 1 lilian lilian 182 oct. 31 13:59 details_lilian.json
with open("details_lilian.json", "r") as f:
details_lilian = json.load(f)
download_attestation(details_lilian)
Filling the form 'firstname' with value '******'... Filling the form 'lastname' with value '******'... Filling the form 'birthday' with value '**********'... Filling the form 'placeofbirth' with value '********'... Filling the form 'address' with value '*************************'... Filling the form 'city' with value '******'... Filling the form 'zipcode' with value '*****'... Filling the form 'datesortie' with value '**********'... Filling the form 'heuresortie' with value '*****'... Checking value of the form 'firstname'... Checking value of the form 'lastname'... Checking value of the form 'birthday'... Checking value of the form 'placeofbirth'... Checking value of the form 'address'... Checking value of the form 'city'... Checking value of the form 'zipcode'... Checking value of the form 'datesortie'... Checking value of the form 'heuresortie'... Clicking on '#checkbox-achats'... Clicking on 'generate-btn'... New sleeping 5 seconds... Found a new <a href='...'> link! with href = blob:https://media.interieur.gouv.fr/986c4c9d-9608-4020-a2da-05f488886ca3 and it has a download = attestation-2020-10-31_14-03.pdf Downloading the file and save it! The PDF file attestation-2020-10-31_14-03.pdf is now saved!
'attestation-2020-10-31_14-03.pdf'
Et on peut les envoyer :
send_attestations()
-rw-r--r-- 1 lilian lilian 42K oct. 31 13:56 attestation-2020-10-31_13-56.pdf -rw-r--r-- 1 lilian lilian 42K oct. 31 13:57 attestation-2020-10-31_13-57.pdf -rw-r--r-- 1 lilian lilian 42K oct. 31 14:01 attestation-2020-10-31_14-01.pdf CP attestation-2020-10-31_13-56.pdf attestation-2020-10-31_13-57.pdf attestation-2020-10-31_14-01.pdf besson@zamok.crans.org:~/www/attestations/2020-10-31/ rsync: receiving the argument(s) attestation-2020-10-31_13-56.pdf attestation-2020-10-31_13-57.pdf attestation-2020-10-31_14-01.pdf besson@zamok.crans.org:~/www/attestations/2020-10-31/ Host key fingerprint is SHA256:EJib/9V3VjjkUX6w8bk2zt8BKxmG0JK6D+PDEyLtNb0 +---[ECDSA 256]---+ | o. o..| | o .o o*.| | o.+ . o.++| | o ..o . + +| | . o. S. + . =.| | . o +o. o + B +| | o ++o... o o * | | ..++E. . +| | .o. o| +----[SHA256]-----+ sending incremental file list created directory /home/besson/www/attestations/2020-10-31 attestation-2020-10-31_13-56.pdf 42.89K 100% 9.65MB/s 0:00:00 (xfr#1, to-chk=2/3) attestation-2020-10-31_13-57.pdf 42.87K 100% 6.81MB/s 0:00:00 (xfr#2, to-chk=1/3) attestation-2020-10-31_14-01.pdf 42.83K 100% 3.40MB/s 0:00:00 (xfr#3, to-chk=0/3) sent 116.28K bytes received 136 bytes 77.61K bytes/sec total size is 128.59K speedup is 1.10
Je ne ferai pas ça dans Python mais avec un simple script bash et un watch
!watch -help
Usage: watch [options] commande Options: -b, --beep émet un bip si la commande sort avec un code non nul -c, --color interprète les séquences de couleurs et de styles ANSI -d, --differences[=<permanent>] met en évidence les changements entre mises à jours -e, --errexit termine si la commande sort avec un code non nul -g, --chgexit termine quand la sortie de la commande change -n, --interval <sec> secondes de délai entre mises à jours -p, --precise essaie d'exécuter la commande à intervalles réguliers -t, --no-title masque l'en-tête -x, --exec passe la commande à exec au lieu de « sh -c » -h, --help affiche cette aide et termine -v, --version affiche les informations de version et sort Pour plus de détails, consultez watch(1).
C'était drôle. C'était une expérimentation. Mais ne vous servez pas de ça !