From 985ee19b1310390f04f67d526cb84710f8b6b4a2 Mon Sep 17 00:00:00 2001 From: Jeremy Howard Date: Thu, 30 May 2024 14:08:50 +1000 Subject: [PATCH] fixes #562 --- fastcore/_modidx.py | 3 ++ fastcore/net.py | 79 +++++++++++++++++++++++++++++---------------- fastcore/xml.py | 3 +- nbs/03b_net.ipynb | 78 ++++++++++++++++++++++++++++++++++++++++++++ nbs/11_xml.ipynb | 9 +++--- 5 files changed, 139 insertions(+), 33 deletions(-) diff --git a/fastcore/_modidx.py b/fastcore/_modidx.py index d3404a00..7cfc2e97 100644 --- a/fastcore/_modidx.py +++ b/fastcore/_modidx.py @@ -377,8 +377,11 @@ 'fastcore.net.Request.summary': ('net.html#request.summary', 'fastcore/net.py'), 'fastcore.net._socket_det': ('net.html#_socket_det', 'fastcore/net.py'), 'fastcore.net.do_request': ('net.html#do_request', 'fastcore/net.py'), + 'fastcore.net.http_response': ('net.html#http_response', 'fastcore/net.py'), + 'fastcore.net.recv_once': ('net.html#recv_once', 'fastcore/net.py'), 'fastcore.net.start_client': ('net.html#start_client', 'fastcore/net.py'), 'fastcore.net.start_server': ('net.html#start_server', 'fastcore/net.py'), + 'fastcore.net.tobytes': ('net.html#tobytes', 'fastcore/net.py'), 'fastcore.net.urlcheck': ('net.html#urlcheck', 'fastcore/net.py'), 'fastcore.net.urlclean': ('net.html#urlclean', 'fastcore/net.py'), 'fastcore.net.urldest': ('net.html#urldest', 'fastcore/net.py'), diff --git a/fastcore/net.py b/fastcore/net.py index e6201157..36ee2641 100644 --- a/fastcore/net.py +++ b/fastcore/net.py @@ -3,8 +3,9 @@ # %% auto 0 __all__ = ['url_default_headers', 'ExceptionsHTTP', 'urlquote', 'urlwrap', 'HTTP4xxClientError', 'HTTP5xxServerError', 'urlopener', 'urlopen', 'urlread', 'urljson', 'urlcheck', 'urlclean', 'urlretrieve', 'urldest', 'urlsave', - 'urlvalid', 'urlrequest', 'urlsend', 'do_request', 'start_server', 'start_client', 'HTTP400BadRequestError', - 'HTTP401UnauthorizedError', 'HTTP402PaymentRequiredError', 'HTTP403ForbiddenError', 'HTTP404NotFoundError', + 'urlvalid', 'urlrequest', 'urlsend', 'do_request', 'start_server', 'start_client', 'tobytes', + 'http_response', 'recv_once', 'HTTP400BadRequestError', 'HTTP401UnauthorizedError', + 'HTTP402PaymentRequiredError', 'HTTP403ForbiddenError', 'HTTP404NotFoundError', 'HTTP405MethodNotAllowedError', 'HTTP406NotAcceptableError', 'HTTP407ProxyAuthRequiredError', 'HTTP408RequestTimeoutError', 'HTTP409ConflictError', 'HTTP410GoneError', 'HTTP411LengthRequiredError', 'HTTP412PreconditionFailedError', 'HTTP413PayloadTooLargeError', 'HTTP414URITooLongError', @@ -27,7 +28,7 @@ from urllib.parse import urlencode,urlparse,urlunparse from http.client import InvalidURL -# %% ../nbs/03b_net.ipynb 4 +# %% ../nbs/03b_net.ipynb 5 url_default_headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", @@ -41,7 +42,7 @@ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" } -# %% ../nbs/03b_net.ipynb 5 +# %% ../nbs/03b_net.ipynb 6 def urlquote(url): "Update url's path with `urllib.parse.quote`" subdelims = "!$&'()*+,;=" @@ -52,31 +53,31 @@ def urlquote(url): for i in range(3,6): p[i] = urllib.parse.quote(p[i], safe=safe) return urlunparse(p) -# %% ../nbs/03b_net.ipynb 8 +# %% ../nbs/03b_net.ipynb 9 def urlwrap(url, data=None, headers=None): "Wrap `url` in a urllib `Request` with `urlquote`" return url if isinstance(url,Request) else Request(urlquote(url), data=data, headers=headers or {}) -# %% ../nbs/03b_net.ipynb 9 +# %% ../nbs/03b_net.ipynb 10 ExceptionsHTTP = {} -# %% ../nbs/03b_net.ipynb 10 +# %% ../nbs/03b_net.ipynb 11 class HTTP4xxClientError(HTTPError): "Base class for client exceptions (code 4xx) from `url*` functions" pass -# %% ../nbs/03b_net.ipynb 11 +# %% ../nbs/03b_net.ipynb 12 class HTTP5xxServerError(HTTPError): "Base class for server exceptions (code 5xx) from `url*` functions" pass -# %% ../nbs/03b_net.ipynb 14 +# %% ../nbs/03b_net.ipynb 15 def urlopener(): _opener = urllib.request.build_opener() _opener.addheaders = list(url_default_headers.items()) return _opener -# %% ../nbs/03b_net.ipynb 15 +# %% ../nbs/03b_net.ipynb 16 # install_opener(_opener) _httperrors = ( @@ -95,10 +96,10 @@ def _init(self, url, hdrs, fp, msg=msg, code=code): HTTP4xxClientError.__init__( cls = type(nm, (HTTP4xxClientError,), {'__init__':_init}) globals()[nm] = ExceptionsHTTP[code] = cls -# %% ../nbs/03b_net.ipynb 16 +# %% ../nbs/03b_net.ipynb 17 _all_ = ['HTTP400BadRequestError', 'HTTP401UnauthorizedError', 'HTTP402PaymentRequiredError', 'HTTP403ForbiddenError', 'HTTP404NotFoundError', 'HTTP405MethodNotAllowedError', 'HTTP406NotAcceptableError', 'HTTP407ProxyAuthRequiredError', 'HTTP408RequestTimeoutError', 'HTTP409ConflictError', 'HTTP410GoneError', 'HTTP411LengthRequiredError', 'HTTP412PreconditionFailedError', 'HTTP413PayloadTooLargeError', 'HTTP414URITooLongError', 'HTTP415UnsupportedMediaTypeError', 'HTTP416RangeNotSatisfiableError', 'HTTP417ExpectationFailedError', 'HTTP418AmAteapotError', 'HTTP421MisdirectedRequestError', 'HTTP422UnprocessableEntityError', 'HTTP423LockedError', 'HTTP424FailedDependencyError', 'HTTP425TooEarlyError', 'HTTP426UpgradeRequiredError', 'HTTP428PreconditionRequiredError', 'HTTP429TooManyRequestsError', 'HTTP431HeaderFieldsTooLargeError', 'HTTP451LegalReasonsError'] -# %% ../nbs/03b_net.ipynb 17 +# %% ../nbs/03b_net.ipynb 18 def urlopen(url, data=None, headers=None, timeout=None, **kwargs): "Like `urllib.request.urlopen`, but first `urlwrap` the `url`, and encode `data`" if kwargs and not data: data=kwargs @@ -110,7 +111,7 @@ def urlopen(url, data=None, headers=None, timeout=None, **kwargs): e.msg += f"\n====Error Body====\n{e.read().decode(errors='ignore')}" raise -# %% ../nbs/03b_net.ipynb 20 +# %% ../nbs/03b_net.ipynb 21 def urlread(url, data=None, headers=None, decode=True, return_json=False, return_headers=False, timeout=None, **kwargs): "Retrieve `url`, using `data` dict or `kwargs` to `POST` if present" try: @@ -123,13 +124,13 @@ def urlread(url, data=None, headers=None, decode=True, return_json=False, return if return_json: res = loads(res) return (res,dict(hdrs)) if return_headers else res -# %% ../nbs/03b_net.ipynb 21 +# %% ../nbs/03b_net.ipynb 22 def urljson(url, data=None, timeout=None): "Retrieve `url` and decode json" res = urlread(url, data=data, timeout=timeout) return json.loads(res) if res else {} -# %% ../nbs/03b_net.ipynb 23 +# %% ../nbs/03b_net.ipynb 24 def urlcheck(url, headers=None, timeout=10): if not url: return True try: @@ -138,12 +139,12 @@ def urlcheck(url, headers=None, timeout=10): except socket.timeout: return False except InvalidURL: return False -# %% ../nbs/03b_net.ipynb 24 +# %% ../nbs/03b_net.ipynb 25 def urlclean(url): "Remove fragment, params, and querystring from `url` if present" return urlunparse(urlparse(str(url))[:3]+('','','')) -# %% ../nbs/03b_net.ipynb 26 +# %% ../nbs/03b_net.ipynb 27 def urlretrieve(url, filename=None, reporthook=None, data=None, headers=None, timeout=None): "Same as `urllib.request.urlretrieve` but also works with `Request` objects" with contextlib.closing(urlopen(url, data, headers=headers, timeout=timeout)) as fp: @@ -169,14 +170,14 @@ def urlretrieve(url, filename=None, reporthook=None, data=None, headers=None, ti raise ContentTooShortError(f"retrieval incomplete: got only {read} out of {size} bytes", headers) return filename,headers -# %% ../nbs/03b_net.ipynb 27 +# %% ../nbs/03b_net.ipynb 28 def urldest(url, dest=None): name = urlclean(Path(url).name) if dest is None: dest = name dest = Path(dest) return dest/name if dest.is_dir() else dest -# %% ../nbs/03b_net.ipynb 28 +# %% ../nbs/03b_net.ipynb 29 def urlsave(url, dest=None, reporthook=None, headers=None, timeout=None): "Retrieve `url` and save based on its name" dest = urldest(url, dest) @@ -184,12 +185,12 @@ def urlsave(url, dest=None, reporthook=None, headers=None, timeout=None): nm,msg = urlretrieve(url, dest, reporthook, headers=headers, timeout=timeout) return nm -# %% ../nbs/03b_net.ipynb 30 +# %% ../nbs/03b_net.ipynb 31 def urlvalid(x): "Test if `x` is a valid URL" return all (getattrs(urlparse(str(x)), 'scheme', 'netloc')) -# %% ../nbs/03b_net.ipynb 32 +# %% ../nbs/03b_net.ipynb 33 def urlrequest(url, verb, headers=None, route=None, query=None, data=None, json_data=True): "`Request` for `url` with optional route params replaced by `route`, plus `query` string, and post `data`" if route: url = url.format(**route) @@ -197,7 +198,7 @@ def urlrequest(url, verb, headers=None, route=None, query=None, data=None, json_ if isinstance(data,dict): data = (json.dumps if json_data else urlencode)(data).encode('ascii') return Request(url, headers=headers or {}, data=data or None, method=verb.upper()) -# %% ../nbs/03b_net.ipynb 35 +# %% ../nbs/03b_net.ipynb 36 @patch def summary(self:Request, skip=None)->dict: "Summary containing full_url, headers, method, and data, removing `skip` from headers" @@ -205,7 +206,7 @@ def summary(self:Request, skip=None)->dict: res['headers'] = {k:v for k,v in self.headers.items() if k not in listify(skip)} return res -# %% ../nbs/03b_net.ipynb 37 +# %% ../nbs/03b_net.ipynb 38 def urlsend(url, verb, headers=None, route=None, query=None, data=None, json_data=True, return_json=True, return_headers=False, debug=None, timeout=None): "Send request with `urlrequest`, converting result to json if `return_json`" @@ -217,7 +218,7 @@ def urlsend(url, verb, headers=None, route=None, query=None, data=None, json_dat return urlread(req, return_json=return_json, return_headers=return_headers, timeout=timeout) -# %% ../nbs/03b_net.ipynb 38 +# %% ../nbs/03b_net.ipynb 39 def do_request(url, post=False, headers=None, **data): "Call GET or json-encoded POST on `url`, depending on `post`" if data: @@ -227,13 +228,13 @@ def do_request(url, post=False, headers=None, **data): data = None return urljson(Request(url, headers=headers, data=data or None)) -# %% ../nbs/03b_net.ipynb 39 +# %% ../nbs/03b_net.ipynb 41 def _socket_det(port,host,dgram): if isinstance(port,int): family,addr = socket.AF_INET,(host or socket.gethostname(),port) else: family,addr = socket.AF_UNIX,port return family,addr,(socket.SOCK_STREAM,socket.SOCK_DGRAM)[dgram] -# %% ../nbs/03b_net.ipynb 40 +# %% ../nbs/03b_net.ipynb 42 def start_server(port, host=None, dgram=False, reuse_addr=True, n_queue=None): "Create a `socket` server on `port`, with optional `host`, of type `dgram`" listen_args = [n_queue] if n_queue else [] @@ -247,10 +248,34 @@ def start_server(port, host=None, dgram=False, reuse_addr=True, n_queue=None): s.listen(*listen_args) return s -# %% ../nbs/03b_net.ipynb 42 +# %% ../nbs/03b_net.ipynb 44 def start_client(port, host=None, dgram=False): "Create a `socket` client on `port`, with optional `host`, of type `dgram`" family,addr,typ = _socket_det(port,host,dgram) s = socket.socket(family, typ) s.connect(addr) return s + +# %% ../nbs/03b_net.ipynb 45 +def tobytes(s:str)->bytes: + "Convert `s` into HTTP-ready bytes format" + return s.replace('\n', '\r\n').encode('utf-8') + +# %% ../nbs/03b_net.ipynb 47 +def http_response(body=None, status=200, hdrs=None, **kwargs): + "Create an HTTP-ready response, adding `kwargs` to `hdrs`" + kwargs = {k.replace('_','-'):v for k,v in kwargs.items()} + hdrs = hdrs or {} + hdrs = {**hdrs, **kwargs} + status_line = f"HTTP/1.1 {status} OK\n" + if body: hdrs['Content-Length'] = len(body) + headers = ''.join([f"{k}: {v}\n" for k, v in hdrs.items()]) + return tobytes(status_line+headers+"\n" + str(body)) + +# %% ../nbs/03b_net.ipynb 49 +@threaded +def recv_once(d:dict, host:str='localhost', port:int=8000): + "Spawn a thread to receive a single HTTP request and store in `d['r']`" + conn,addr = start_server(port,host).accept() + d['r'] = conn.recv(1024) + conn.sendall(http_response(d['r'])) diff --git a/fastcore/xml.py b/fastcore/xml.py index 87fc5968..4a280f1c 100644 --- a/fastcore/xml.py +++ b/fastcore/xml.py @@ -19,7 +19,8 @@ # %% ../nbs/11_xml.ipynb 4 def _attrmap(o): - o = dict(htmlClass='class', cls='class', klass='class', fr='for', htmlFor='for').get(o, o) + o = dict(htmlClass='class', cls='class', _class='class', klass='class', + _for='for', fr='for', htmlFor='for').get(o, o) return o.lstrip('_').replace('_', '-') # %% ../nbs/11_xml.ipynb 5 diff --git a/nbs/03b_net.ipynb b/nbs/03b_net.ipynb index 3127076d..74d191a0 100644 --- a/nbs/03b_net.ipynb +++ b/nbs/03b_net.ipynb @@ -49,6 +49,13 @@ "> Network, HTTP, and URL functions" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## URLs" + ] + }, { "cell_type": "code", "execution_count": null, @@ -642,6 +649,13 @@ " return urljson(Request(url, headers=headers, data=data or None))" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Basic client/server" + ] + }, { "cell_type": "code", "execution_count": null, @@ -698,6 +712,70 @@ " return s" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#|export\n", + "def tobytes(s:str)->bytes:\n", + " \"Convert `s` into HTTP-ready bytes format\"\n", + " return s.replace('\\n', '\\r\\n').encode('utf-8')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_eq(tobytes('foo\\nbar'), b'foo\\r\\nbar')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#|export\n", + "def http_response(body=None, status=200, hdrs=None, **kwargs):\n", + " \"Create an HTTP-ready response, adding `kwargs` to `hdrs`\"\n", + " kwargs = {k.replace('_','-'):v for k,v in kwargs.items()}\n", + " hdrs = hdrs or {}\n", + " hdrs = {**hdrs, **kwargs}\n", + " status_line = f\"HTTP/1.1 {status} OK\\n\"\n", + " if body: hdrs['Content-Length'] = len(body)\n", + " headers = ''.join([f\"{k}: {v}\\n\" for k, v in hdrs.items()])\n", + " return tobytes(status_line+headers+\"\\n\" + str(body))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "exp = b'HTTP/1.1 200 OK\\r\\nUser-Agent: me\\r\\nContent-Length: 4\\r\\n\\r\\nbody'\n", + "test_eq(http_response('body', 200, User_Agent='me'), exp)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#|export\n", + "@threaded\n", + "def recv_once(d:dict, host:str='localhost', port:int=8000):\n", + " \"Spawn a thread to receive a single HTTP request and store in `d['r']`\"\n", + " conn,addr = start_server(port,host).accept()\n", + " d['r'] = conn.recv(1024)\n", + " conn.sendall(http_response(d['r']))" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/nbs/11_xml.ipynb b/nbs/11_xml.ipynb index 0acd73a3..8bdc6222 100644 --- a/nbs/11_xml.ipynb +++ b/nbs/11_xml.ipynb @@ -56,7 +56,8 @@ "source": [ "#| export\n", "def _attrmap(o):\n", - " o = dict(htmlClass='class', cls='class', klass='class', fr='for', htmlFor='for').get(o, o)\n", + " o = dict(htmlClass='class', cls='class', _class='class', klass='class',\n", + " _for='for', fr='for', htmlFor='for').get(o, o)\n", " return o.lstrip('_').replace('_', '-')" ] }, @@ -139,7 +140,7 @@ " (['head', (['title', ('Some page',), {}],), {}],\n", " ['body',\n", " (['div',\n", - " (['p', ('Some text',), {}],\n", + " ('Some text',\n", " ['input', (), {'name': 'me'}],\n", " ['img', (), {'src': 'filename'}]),\n", " {'class': 'myclass'}],),\n", @@ -151,7 +152,7 @@ "source": [ "samp = Html(\n", " Head(Title('Some page')),\n", - " Body(Div(P('Some text'), Input(name='me'), Img(src=\"filename\"), klass='myclass'))\n", + " Body(Div('Some text', Input(name='me'), Img(src=\"filename\"), klass='myclass'))\n", ")\n", "pprint(samp)" ] @@ -261,9 +262,7 @@ " \n", " \n", "
\n", - "

\n", "Some text\n", - "

\n", " \n", " \n", "
\n",