rentgen/extended-request.ts

419 lines
14 KiB
TypeScript
Raw Normal View History

2022-07-09 15:51:34 +02:00
'use strict';
2022-08-13 22:42:50 +02:00
import { DataLocation, StolenDataEntry } from './stolen-data-entry';
2021-11-22 17:54:15 +01:00
import {
2022-04-22 13:00:02 +02:00
flattenObjectEntries,
getshorthost,
parseCookie,
Request,
safeDecodeURIComponent,
} from './util';
2021-10-03 09:03:56 +02:00
type NameValue = { name: string; value: string };
2021-11-08 20:14:28 +01:00
export type HAREntry = {
2022-04-22 13:00:02 +02:00
pageref: string;
startedDateTime: string;
request: {
bodySize: number;
cookies: NameValue[];
headers: NameValue[];
headersSize: number;
httpVersion: string;
method: string;
postData?: {
mimeType: string;
params: (NameValue & {
fileName: string;
contentType: string;
comment: '';
})[];
text: string;
};
queryString: NameValue[];
url: string;
2021-11-08 20:14:28 +01:00
};
2022-04-22 13:00:02 +02:00
response: {
status: number;
statusText: string;
httpVersion: string;
headers: NameValue[];
cookies: NameValue[];
content: {
mimeType: string;
size: number;
encoding: 'base64';
text: string;
};
redirectURL: '';
headersSize: number;
bodySize: number;
}; // not relevant
cache: {};
timings: {};
time: number;
_securityState: string;
serverIPAddress: string;
connection: string;
2021-11-08 20:14:28 +01:00
};
const whitelisted_cookies = [
2022-04-22 13:00:02 +02:00
/^Accept.*$/,
/^Host$/,
/^Connection$/,
/^Sec-Fetch-.*$/,
/^Content-Type$/,
/^Cookie$/, // we're extracting it in getCookie separately anyway
/^User-Agent$/,
];
2021-11-26 20:58:31 +01:00
type RequestBody = {
2022-04-22 13:00:02 +02:00
error?: string;
formData?: Record<string, string[]>;
raw?: { bytes: ArrayBuffer; file?: string }[];
2021-11-26 20:58:31 +01:00
};
2021-10-03 09:03:56 +02:00
export default class ExtendedRequest {
2022-04-22 13:00:02 +02:00
public tabId: number;
public url: string;
public shorthost: string;
2022-07-09 15:28:37 +02:00
public requestHeaders: { name: string; value?: string; binaryValue?: number[] }[] = [];
2022-04-22 13:00:02 +02:00
public origin: string;
public initialized = false;
2022-07-09 15:28:37 +02:00
public stolenData: StolenDataEntry[] = [];
2022-07-09 15:51:34 +02:00
public originalURL: string | null = null; // sometimes we can only establish that the given request applied to a certain origin, not a full URL from the address bar - in case of service workers, for example. Hence the null
public originalPathname: string | null = null; // same as above
public originalHost: string;
2022-04-22 13:00:02 +02:00
public requestBody: RequestBody;
static by_id = {} as Record<string, ExtendedRequest>;
public data: Request;
constructor(data: Request) {
this.tabId = data.tabId;
this.url = data.url;
this.shorthost = getshorthost(data.url);
this.requestBody = ((data as any).requestBody as undefined | RequestBody) || {};
ExtendedRequest.by_id[data.requestId] = this;
this.data = Object.assign({}, data);
(this.data as any).frameAncestors = [
2022-07-09 15:51:34 +02:00
...((data as any)?.frameAncestors?.map((e: any) => ({ url: e.url })) || []),
]; // making a copy?
2022-04-22 13:00:02 +02:00
2022-07-09 15:51:34 +02:00
// console.log('→→→',(this.data as any).frameAncestors, (data as any).frameAncestors);
2022-04-22 13:00:02 +02:00
let url: string;
2022-07-09 15:51:34 +02:00
let is_full_url = true;
let url_comes_from: string;
if (this.data.type === 'main_frame') {
url = this.data.url;
url_comes_from = 'main_frame';
2022-07-09 15:51:34 +02:00
} else if (this.data.frameId === 0 && this.data.documentUrl) {
url = this.data.documentUrl;
url_comes_from = 'documentUrl';
2022-07-09 15:51:34 +02:00
if (this.data.tabId == -1) {
//a service worker?
url_comes_from = 'documentUrl (webworker)';
2022-07-09 15:51:34 +02:00
is_full_url = false;
}
2022-04-22 13:00:02 +02:00
} else if (
(this.data as any)?.frameAncestors &&
(this.data as any).frameAncestors[0] !== undefined
) {
url = (this.data as any).frameAncestors.at(-1).url || '';
url_comes_from = 'frameAncestors';
2022-04-22 13:00:02 +02:00
} else {
2022-07-09 15:51:34 +02:00
url = this.data.documentUrl || this.data.originUrl;
url_comes_from = 'last resort';
2022-04-22 13:00:02 +02:00
}
2021-10-03 09:03:56 +02:00
2022-07-09 15:51:34 +02:00
this.originalURL = is_full_url ? url : null;
2022-04-22 13:00:02 +02:00
this.origin = new URL(url).origin;
2022-07-09 15:51:34 +02:00
this.originalHost = new URL(url).host;
this.originalPathname = is_full_url ? new URL(url).pathname : null;
}
addHeaders(headers: Request['requestHeaders']) {
this.requestHeaders = headers || [];
return this;
}
init() {
this.initialized = true;
this.stolenData = this.getAllStolenData();
2021-10-03 09:03:56 +02:00
}
2022-04-22 13:00:02 +02:00
isThirdParty() {
const request_url = new URL(this.data.url);
2022-07-09 15:51:34 +02:00
if (request_url.host.includes(this.originalHost)) {
2022-04-22 13:00:02 +02:00
return false;
}
2022-07-09 15:51:34 +02:00
if (getshorthost(request_url.host) == getshorthost(this.originalHost)) {
2022-04-22 13:00:02 +02:00
return false;
}
return (
2022-07-09 15:51:34 +02:00
request_url.origin != this.origin ||
2022-04-22 13:00:02 +02:00
(this.data as any).urlClassification.thirdParty.length > 0
);
2021-10-03 09:03:56 +02:00
}
2022-04-22 13:00:02 +02:00
getReferer() {
return (
this.requestHeaders.filter((h) => h.name === 'Referer')[0]?.value || 'missing-referrer'
);
}
2022-04-22 13:00:02 +02:00
2022-08-13 22:42:50 +02:00
exposesOriginWhere(): null | DataLocation {
2022-07-09 15:51:34 +02:00
const host = this.originalHost;
const path = this.originalPathname || '/';
2022-04-22 13:00:02 +02:00
const shorthost = getshorthost(host);
if (this.getReferer().includes(shorthost)) {
2022-08-13 22:42:50 +02:00
return { path: this.url, source: 'header', key: 'Referer' };
2022-04-22 13:00:02 +02:00
}
for (const entry of this.stolenData) {
if (
entry.value.includes(host) ||
entry.value.includes(path) ||
entry.value.includes(shorthost)
) {
2022-08-13 22:42:50 +02:00
return entry.toDataLocation();
2022-04-22 13:00:02 +02:00
}
}
2022-08-13 22:42:50 +02:00
return null;
}
exposesOrigin() {
return this.exposesOriginWhere() !== null;
}
2022-04-22 13:00:02 +02:00
private getAllStolenData(): StolenDataEntry[] {
return [
...this.getPathParams(),
...this.getCookieData(),
...this.getQueryParams(),
...this.getHeadersData(),
...this.getRequestBodyData(),
];
2021-10-04 18:51:51 +02:00
}
2022-04-22 13:00:02 +02:00
getCookieData(): StolenDataEntry[] {
if (!this.hasCookie() || this.getCookie() === undefined) {
return [];
2021-11-26 22:07:05 +01:00
}
2022-04-22 13:00:02 +02:00
return flattenObjectEntries(
Object.entries(parseCookie(this.getCookie())).map(([key, value]) => [key, value || '']),
StolenDataEntry.parseValue
).map(([key, value]) => new StolenDataEntry(this, 'cookie', key, value));
}
getRequestBodyData(): StolenDataEntry[] {
const ret = flattenObjectEntries(
Object.entries({
...this.requestBody.formData,
...Object.fromEntries(
Object.entries(this.requestBody.raw || {}).map(([key, value], index) => [
`${key}.${index}`,
value,
])
),
}).map(([key, value]) => {
// to handle how ocdn.eu encrypts POST body on https://businessinsider.com.pl/
if ((Array.isArray(value) && value.length === 1 && !value[0]) || !value) {
return ['requestBody', key];
} else if (!Array.isArray(value)) {
2022-07-09 15:51:34 +02:00
return [
'raw',
String.fromCharCode.apply(null, Array.from(new Uint8Array(value.bytes))),
];
2022-04-22 13:00:02 +02:00
} else {
return [key, value || ''];
}
}),
StolenDataEntry.parseValue
).map(([key, value]) => new StolenDataEntry(this, 'request_body', key, value));
return ret;
}
hasReferer() {
return this.requestHeaders.some((h) => h.name === 'Referer');
}
hasCookie() {
return this.requestHeaders.some((h) => h.name === 'Cookie');
}
getCookie(): string {
2022-07-09 15:51:34 +02:00
return this.requestHeaders.find((h) => h.name == 'Cookie')?.value || '';
2022-04-22 13:00:02 +02:00
}
getPathParams(): StolenDataEntry[] {
const url = new URL(this.data.url);
const path = url.pathname;
if (!path.includes(';')) {
return [];
}
return flattenObjectEntries(
path
.split(';')
.map((e) => e.split('='))
.map(([key, value]) => [key, value || ''])
.map(([key, value]) => {
return [key, StolenDataEntry.parseValue(safeDecodeURIComponent(value))];
})
).map(([key, value]) => new StolenDataEntry(this, 'pathname', key, value));
}
getQueryParams(): StolenDataEntry[] {
const url = new URL(this.data.url);
return flattenObjectEntries(
2022-07-09 15:51:34 +02:00
(Array.from((url.searchParams as any).entries()) as [string, string][])
.map(([key, value]: [string, string]) => [key, value || ''])
2022-04-22 13:00:02 +02:00
.map(([key, value]) => {
return [key, StolenDataEntry.parseValue(safeDecodeURIComponent(value))];
})
).map(([key, value]) => {
return new StolenDataEntry(this, 'queryparams', key, value);
});
2021-10-03 20:13:36 +02:00
}
2022-04-22 13:00:02 +02:00
getHeadersData(): StolenDataEntry[] {
return flattenObjectEntries(
this.requestHeaders
.filter((header) => {
for (const regex of whitelisted_cookies) {
if (regex.test(header.name)) {
return false;
}
}
return true;
})
.map((header) => {
return [
header.name,
2022-07-09 15:51:34 +02:00
StolenDataEntry.parseValue(safeDecodeURIComponent(header.value || '')),
2022-04-22 13:00:02 +02:00
];
})
).map(([key, value]) => new StolenDataEntry(this, 'header', key, value));
}
hasMark() {
return this.stolenData.some((data) => data.isMarked);
}
2022-04-22 13:00:02 +02:00
getMarkedEntries() {
return this.stolenData.filter((data) => data.isMarked);
}
2022-04-22 13:00:02 +02:00
unmarkAllEntries() {
this.stolenData.forEach((entry) => entry.unmark());
}
2022-04-22 13:00:02 +02:00
getHost() {
return new URL(this.url).host;
}
2022-04-22 13:00:02 +02:00
matchesHAREntry(har: HAREntry): boolean {
const rq = this.data;
const hrq = har.request;
return rq.url == hrq.url;
}
toHAR(): HAREntry {
return {
pageref: 'page_1',
startedDateTime: `${new Date().toJSON().replace('Z', '+01:00')}`,
request: {
bodySize:
JSON.stringify(this.requestBody.formData || {}).length +
(this.requestBody.raw || [])
.map((e) => e.bytes.byteLength)
.reduce((a, b) => a + b, 0),
method: this.data.method,
url: this.data.url,
headersSize: JSON.stringify(this.requestHeaders).length,
httpVersion: 'HTTP/2',
headers: this.requestHeaders as NameValue[],
cookies: this.getCookieData().map((cookie) => ({
name: cookie.name,
value: cookie.value,
})),
queryString: this.getQueryParams().map((param) => ({
name: param.name,
value: param.value,
})),
postData: {
mimeType: 'application/x-www-form-urlencoded',
params: this.stolenData
.filter((e) => e.source == 'request_body')
.map((e) => ({
name: e.name,
value: e.value,
fileName: '--' + Math.ceil(Math.random() * 1000000000),
contentType: 'text/plain',
comment: '',
})),
text: this.stolenData
.filter((e) => e.source == 'request_body')
.map((e) => `${e.name}:\t${StolenDataEntry.parseValue(e.value)}`)
.join('\n\n'),
},
},
response: {
status: 200,
statusText: 'OK',
httpVersion: 'HTTP/2',
headers: [],
cookies: [],
content: {
mimeType: 'text/plain',
size: this.getBalancedPriority(),
encoding: 'base64',
text: 'ZG9lc24ndCBtYXR0ZXIK',
},
redirectURL: '',
headersSize: 15,
bodySize: 15,
},
cache: {},
timings: {
blocked: -1,
dns: 0,
connect: 0,
ssl: 0,
send: 0,
wait: 79,
receive: 0,
},
time: 79,
_securityState: 'secure',
serverIPAddress: '31.13.92.36',
connection: '443',
};
}
2022-04-22 13:00:02 +02:00
getMaxPriority(): number {
return Math.max(...this.stolenData.map((entry) => entry.getPriority()));
}
2022-04-22 13:00:02 +02:00
getBalancedPriority(): number {
let result = 0;
if (this.stolenData.some((e) => e.exposesPath())) {
result += 50;
}
if (this.stolenData.some((e) => e.exposesHost())) {
result += 50;
}
if (this.hasCookie()) {
result += 50;
}
if (this.stolenData.some((e) => e.classification === 'location')) {
result += 300;
}
if (this.url.includes('facebook')) {
result += 50;
}
return result;
}
2021-10-03 09:03:56 +02:00
}