Skip to content

Commit

Permalink
Merge pull request #4 from russellsteadman:dev/botPost
Browse files Browse the repository at this point in the history
Add post/put/delete functionality to netscrape
  • Loading branch information
russellsteadman authored Oct 13, 2023
2 parents a9e9fd7 + 73a1b12 commit 3075d53
Show file tree
Hide file tree
Showing 6 changed files with 185 additions and 16 deletions.
31 changes: 31 additions & 0 deletions packages/bot/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,37 @@ try {
}
```

### Bot#makeRequestWithBody

```ts
import Bot from 'netscrape';

const exampleBot = new Bot({ name: 'ExampleBot', version: '1.0' });

try {
/* Note: Bot#makeRequestWithBody automatically requests /robots.txt in the background */
/* Note: Bot#makeRequestWithBody automatically sets the
Content-Type header to application/json or text/plain based on the body type */

const response = await exampleBot.makeRequestWithBody(
'https://www.example.com/path' /* required, well-formatted URL to make request to */,
{
example: 'body',
} /* required, body to send to server (string or object) */,
{
'x-example-header': 'example header',
} /* optional, headers to send to server */,
'POST' /* optional, HTTP method to use (default POST) */,
);

/* Bot#makeRequestWithBody returns the raw npm.im/got package request response */
console.log(response.body);
} catch (error) {
/* Robots.txt rejection, robots.txt 500 error, etc. */
console.error(error);
}
```

## License

MIT (C) 2023 [Russell Steadman](https://github.com/russellsteadman). See LICENSE file. Visit [Google
Expand Down
2 changes: 1 addition & 1 deletion packages/bot/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "netscrape",
"version": "0.2.0",
"version": "0.3.0",
"description": "A structural framework for creating good bots",
"author": "Russell Steadman",
"license": "MIT",
Expand Down
73 changes: 58 additions & 15 deletions packages/bot/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import CacheableLookup from 'cacheable-lookup';
import * as Errors from './errors.js';
import { RobotsTxt } from 'exclusion';
import got, { Options, type Request, type Response } from 'got';
import { normalizeHeaders } from './utilities.js';

export type BotOptions = {
name: string;
Expand Down Expand Up @@ -119,13 +120,13 @@ class Bot {
];
}

private fetchURL(
private fetchURL<T extends unknown>(
rawURL: string,
options?: BotRequestOptions & { stream?: false },
): Promise<Response<string>>;
): Promise<Response<T>>;
private fetchURL(
rawURL: string,
asStream: BotRequestOptions & { stream: true },
options: BotRequestOptions & { stream: true },
): Promise<Request>;
private async fetchURL(
rawURL: string,
Expand All @@ -135,18 +136,11 @@ class Bot {
const url = new URL(rawURL);

// Initialize the headers
const standardHeaders = { ...options?.headers };

// Convert all headers to lowercase
for (const key of Object.keys(standardHeaders)) {
if (key !== key.toLowerCase()) {
standardHeaders[key.toLowerCase()] = standardHeaders[key];
delete standardHeaders[key];
}
}
const standardHeaders = normalizeHeaders(options?.headers ?? {});

// Initialize the default request options
const defaultOptions: Partial<Options> = {
method: 'GET',
timeout: {
lookup: 6e4,
socket: 6e4,
Expand Down Expand Up @@ -189,15 +183,15 @@ class Bot {
}

// Fetch the URL as a string if requested
return got.get(rawURL, {
return got(rawURL, {
...defaultOptions,
responseType: 'text',
}) as Request | Response<string>;
}

private async fetchRobotsTxt(origin: string) {
// Fetch the robots.txt
const robotsTxt = await this.fetchURL(`${origin}/robots.txt`, {
const robotsTxt = await this.fetchURL<string>(`${origin}/robots.txt`, {
overrides: { throwHttpErrors: false },
});

Expand Down Expand Up @@ -287,7 +281,56 @@ class Bot {
}

// Fetch the URL as a string if requested
return this.fetchURL(rawURL);
return this.fetchURL<string>(rawURL);
}

async makeRequestWithBody<T extends unknown>(
rawURL: string,
body: string | Record<string, any>,
headers?: Record<string, string>,
method: 'POST' | 'PUT' | 'DELETE' = 'POST',
): Promise<Response<T>> {
// Parse URL
const url = new URL(rawURL);

// Get the robots.txt for the origin
await this.fetchRobotsTxt(url.origin);

// Check if the path is allowed
const allowedByRobots = this.robotsTxt[url.origin].isPathAllowed(
`${url.pathname}${url.search}`,
this.botName,
);

// If not allowed, throw a rejection
if (!allowedByRobots) {
throw new Errors.RobotsRejection('Request blocked by robots.txt');
}

// Wait for the required delay
await this.waitForRequestDelay(url.origin);

// Format the body as a string
const formattedBody =
typeof body === 'string' ? body : JSON.stringify(body);

// Fetch the URL as a string if requested
return this.fetchURL<T>(rawURL, {
headers: {
// Set the content type and length
'content-type':
typeof body !== 'string' ? 'application/json' : 'text/plain',
'content-length': Buffer.byteLength(formattedBody).toString(),

// Set the rest of the headers
// content-type can be overridden
...normalizeHeaders(headers ?? {}),
},
overrides: {
method,
body: formattedBody,
},
});
}
}

Expand Down
16 changes: 16 additions & 0 deletions packages/bot/src/test/_server.util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,34 @@ export function createServer(
requests.push(req);
res.send('Home');
});
app.post('/', (req, res) => {
requests.push(req);
res.send('Home POST');
});
app.get('/a', (req, res) => {
requests.push(req);
res.send('A');
});
app.post('/a', (req, res) => {
requests.push(req);
res.send('A POST');
});
app.get('/a/b', (req, res) => {
requests.push(req);
res.send('B');
});
app.post('/a/b', (req, res) => {
requests.push(req);
res.send('B POST');
});
app.get('/a/b/c', (req, res) => {
requests.push(req);
res.send('C');
});
app.post('/a/b/c', (req, res) => {
requests.push(req);
res.send('C POST');
});
app.get('/robots.txt', (req, res) => {
requests.push(req);

Expand Down
66 changes: 66 additions & 0 deletions packages/bot/src/test/bot.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,69 @@ test('RFC 9309 2.3.1.4: Responds to 500 errors', async (it) => {
},
);
});

test('Bot#makeRequestWithBody()', async (it) => {
const bot = new Bot({ name: 'Test', version: '0.1' });
const start = await startServer('User-agent: *\nAllow: /');
it.teardown(async () => await stopServer(start.server));

it.is(typeof bot.makeRequestWithBody, 'function');

let res = await bot.makeRequestWithBody(
`http://127.0.0.1:${start.port}/`,
'Hello, world!',
);
it.is(res.body, 'Home POST');

res = await bot.makeRequestWithBody(
`http://127.0.0.1:${start.port}/a`,
'Hello, world!',
);

it.is(res.body, 'A POST');

res = await bot.makeRequestWithBody(
`http://127.0.0.1:${start.port}/a/b`,
'Hello, world!',
);

it.is(res.body, 'B POST');

res = await bot.makeRequestWithBody(
`http://127.0.0.1:${start.port}/a/b/c`,
'Hello, world!',
);

it.is(res.body, 'C POST');

let req = start.requests[1];
it.is(req.method, 'POST');
it.is(req.headers['content-type'], 'text/plain');
it.is(req.headers['content-length'], '13');

res = await bot.makeRequestWithBody(
`http://127.0.0.1:${start.port}/`,
'<p></p>',
{ 'Content-Type': 'text/html' },
);

req = start.requests[5];

it.is(res.body, 'Home POST');

it.is(req.method, 'POST');
it.is(req.headers['content-type'], 'text/html');
it.is(req.headers['content-length'], '7');

res = await bot.makeRequestWithBody(`http://127.0.0.1:${start.port}/`, {
hello: 'world',
});

req = start.requests[6];

it.is(res.body, 'Home POST');

it.is(req.method, 'POST');
it.is(req.headers['content-type'], 'application/json');
it.is(req.headers['content-length'], '17');
});
13 changes: 13 additions & 0 deletions packages/bot/src/utilities.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
export const normalizeHeaders = (headers: Record<string, string>) => {
const standardHeaders = { ...headers };

// Convert all headers to lowercase
for (const key of Object.keys(standardHeaders)) {
if (key !== key.toLowerCase()) {
standardHeaders[key.toLowerCase()] = standardHeaders[key];
delete standardHeaders[key];
}
}

return standardHeaders;
};

0 comments on commit 3075d53

Please sign in to comment.