2022-07-09 15:51:34 +02:00
'use strict' ;
2022-08-13 22:42:50 +02:00
import { DataLocation , StolenDataEntry } from './stolen-data-entry' ;
2021-11-22 17:54:15 +01:00
import {
2022-04-22 13:00:02 +02:00
flattenObjectEntries ,
getshorthost ,
parseCookie ,
Request ,
safeDecodeURIComponent ,
} from './util' ;
2021-10-03 09:03:56 +02:00
2021-11-09 17:47:42 +01:00
type NameValue = { name : string ; value : string } ;
2021-11-08 20:14:28 +01:00
export type HAREntry = {
2022-04-22 13:00:02 +02:00
pageref : string ;
startedDateTime : string ;
request : {
bodySize : number ;
cookies : NameValue [ ] ;
headers : NameValue [ ] ;
headersSize : number ;
httpVersion : string ;
method : string ;
postData ? : {
mimeType : string ;
params : ( NameValue & {
fileName : string ;
contentType : string ;
comment : '' ;
} ) [ ] ;
text : string ;
} ;
queryString : NameValue [ ] ;
url : string ;
2021-11-08 20:14:28 +01:00
} ;
2022-04-22 13:00:02 +02:00
response : {
status : number ;
statusText : string ;
httpVersion : string ;
headers : NameValue [ ] ;
cookies : NameValue [ ] ;
content : {
mimeType : string ;
size : number ;
encoding : 'base64' ;
text : string ;
} ;
redirectURL : '' ;
headersSize : number ;
bodySize : number ;
} ; // not relevant
cache : { } ;
timings : { } ;
time : number ;
_securityState : string ;
serverIPAddress : string ;
connection : string ;
2021-11-08 20:14:28 +01:00
} ;
2021-10-06 17:22:33 +02:00
const whitelisted_cookies = [
2022-04-22 13:00:02 +02:00
/^Accept.*$/ ,
/^Host$/ ,
/^Connection$/ ,
/^Sec-Fetch-.*$/ ,
/^Content-Type$/ ,
/^Cookie$/ , // we're extracting it in getCookie separately anyway
/^User-Agent$/ ,
2021-10-06 17:22:33 +02:00
] ;
2021-11-26 20:58:31 +01:00
type RequestBody = {
2022-04-22 13:00:02 +02:00
error? : string ;
formData? : Record < string , string [ ] > ;
raw ? : { bytes : ArrayBuffer ; file? : string } [ ] ;
2021-11-26 20:58:31 +01:00
} ;
2021-10-03 09:03:56 +02:00
export default class ExtendedRequest {
2022-04-22 13:00:02 +02:00
public tabId : number ;
public url : string ;
public shorthost : string ;
2022-07-09 15:28:37 +02:00
public requestHeaders : { name : string ; value? : string ; binaryValue? : number [ ] } [ ] = [ ] ;
2022-04-22 13:00:02 +02:00
public origin : string ;
public initialized = false ;
2022-07-09 15:28:37 +02:00
public stolenData : StolenDataEntry [ ] = [ ] ;
2022-07-09 15:51:34 +02:00
public originalURL : string | null = null ; // sometimes we can only establish that the given request applied to a certain origin, not a full URL from the address bar - in case of service workers, for example. Hence the null
public originalPathname : string | null = null ; // same as above
public originalHost : string ;
2022-04-22 13:00:02 +02:00
public requestBody : RequestBody ;
static by_id = { } as Record < string , ExtendedRequest > ;
public data : Request ;
constructor ( data : Request ) {
this . tabId = data . tabId ;
this . url = data . url ;
this . shorthost = getshorthost ( data . url ) ;
this . requestBody = ( ( data as any ) . requestBody as undefined | RequestBody ) || { } ;
ExtendedRequest . by_id [ data . requestId ] = this ;
this . data = Object . assign ( { } , data ) ;
( this . data as any ) . frameAncestors = [
2022-07-09 15:51:34 +02:00
. . . ( ( data as any ) ? . frameAncestors ? . map ( ( e : any ) = > ( { url : e.url } ) ) || [ ] ) ,
] ; // making a copy?
2022-04-22 13:00:02 +02:00
2022-07-09 15:51:34 +02:00
// console.log('→→→',(this.data as any).frameAncestors, (data as any).frameAncestors);
2021-11-08 20:55:37 +01:00
2022-04-22 13:00:02 +02:00
let url : string ;
2022-07-09 15:51:34 +02:00
let is_full_url = true ;
2022-07-09 16:04:52 +02:00
let url_comes_from : string ;
2022-07-07 20:00:33 +02:00
if ( this . data . type === 'main_frame' ) {
url = this . data . url ;
2022-07-09 16:04:52 +02:00
url_comes_from = 'main_frame' ;
2022-07-09 15:51:34 +02:00
} else if ( this . data . frameId === 0 && this . data . documentUrl ) {
url = this . data . documentUrl ;
2022-07-09 16:04:52 +02:00
url_comes_from = 'documentUrl' ;
2022-07-09 15:51:34 +02:00
if ( this . data . tabId == - 1 ) {
//a service worker?
2022-07-09 16:04:52 +02:00
url_comes_from = 'documentUrl (webworker)' ;
2022-07-09 15:51:34 +02:00
is_full_url = false ;
}
2022-04-22 13:00:02 +02:00
} else if (
( this . data as any ) ? . frameAncestors &&
( this . data as any ) . frameAncestors [ 0 ] !== undefined
) {
2022-07-09 16:04:52 +02:00
url = ( this . data as any ) . frameAncestors . at ( - 1 ) . url || '' ;
url_comes_from = 'frameAncestors' ;
2022-04-22 13:00:02 +02:00
} else {
2022-07-09 15:51:34 +02:00
url = this . data . documentUrl || this . data . originUrl ;
2022-07-09 16:04:52 +02:00
url_comes_from = 'last resort' ;
2022-04-22 13:00:02 +02:00
}
2021-10-03 09:03:56 +02:00
2022-07-09 15:51:34 +02:00
this . originalURL = is_full_url ? url : null ;
2022-04-22 13:00:02 +02:00
this . origin = new URL ( url ) . origin ;
2022-07-09 15:51:34 +02:00
this . originalHost = new URL ( url ) . host ;
this . originalPathname = is_full_url ? new URL ( url ) . pathname : null ;
}
addHeaders ( headers : Request [ 'requestHeaders' ] ) {
this . requestHeaders = headers || [ ] ;
return this ;
}
init() {
this . initialized = true ;
this . stolenData = this . getAllStolenData ( ) ;
2021-10-03 09:03:56 +02:00
}
2022-04-22 13:00:02 +02:00
isThirdParty() {
const request_url = new URL ( this . data . url ) ;
2022-07-09 15:51:34 +02:00
if ( request_url . host . includes ( this . originalHost ) ) {
2022-04-22 13:00:02 +02:00
return false ;
}
2022-07-09 15:51:34 +02:00
if ( getshorthost ( request_url . host ) == getshorthost ( this . originalHost ) ) {
2022-04-22 13:00:02 +02:00
return false ;
}
return (
2022-07-09 15:51:34 +02:00
request_url . origin != this . origin ||
2022-04-22 13:00:02 +02:00
( this . data as any ) . urlClassification . thirdParty . length > 0
) ;
2021-10-03 09:03:56 +02:00
}
2022-04-22 13:00:02 +02:00
getReferer() {
return (
this . requestHeaders . filter ( ( h ) = > h . name === 'Referer' ) [ 0 ] ? . value || 'missing-referrer'
) ;
2021-11-22 18:23:11 +01:00
}
2022-04-22 13:00:02 +02:00
2022-08-13 22:42:50 +02:00
exposesOriginWhere ( ) : null | DataLocation {
2022-07-09 15:51:34 +02:00
const host = this . originalHost ;
const path = this . originalPathname || '/' ;
2022-04-22 13:00:02 +02:00
const shorthost = getshorthost ( host ) ;
if ( this . getReferer ( ) . includes ( shorthost ) ) {
2022-08-13 22:42:50 +02:00
return { path : this.url , source : 'header' , key : 'Referer' } ;
2022-04-22 13:00:02 +02:00
}
for ( const entry of this . stolenData ) {
if (
entry . value . includes ( host ) ||
entry . value . includes ( path ) ||
entry . value . includes ( shorthost )
) {
2022-08-13 22:42:50 +02:00
return entry . toDataLocation ( ) ;
2022-04-22 13:00:02 +02:00
}
}
2022-08-13 22:42:50 +02:00
return null ;
}
exposesOrigin() {
return this . exposesOriginWhere ( ) !== null ;
2021-11-22 18:23:11 +01:00
}
2022-04-22 13:00:02 +02:00
private getAllStolenData ( ) : StolenDataEntry [ ] {
return [
. . . this . getPathParams ( ) ,
. . . this . getCookieData ( ) ,
. . . this . getQueryParams ( ) ,
. . . this . getHeadersData ( ) ,
. . . this . getRequestBodyData ( ) ,
] ;
2021-10-04 18:51:51 +02:00
}
2022-04-22 13:00:02 +02:00
getCookieData ( ) : StolenDataEntry [ ] {
if ( ! this . hasCookie ( ) || this . getCookie ( ) === undefined ) {
return [ ] ;
2021-11-26 22:07:05 +01:00
}
2022-04-22 13:00:02 +02:00
return flattenObjectEntries (
Object . entries ( parseCookie ( this . getCookie ( ) ) ) . map ( ( [ key , value ] ) = > [ key , value || '' ] ) ,
StolenDataEntry . parseValue
) . map ( ( [ key , value ] ) = > new StolenDataEntry ( this , 'cookie' , key , value ) ) ;
}
getRequestBodyData ( ) : StolenDataEntry [ ] {
const ret = flattenObjectEntries (
Object . entries ( {
. . . this . requestBody . formData ,
. . . Object . fromEntries (
Object . entries ( this . requestBody . raw || { } ) . map ( ( [ key , value ] , index ) = > [
` ${ key } . ${ index } ` ,
value ,
] )
) ,
} ) . map ( ( [ key , value ] ) = > {
// to handle how ocdn.eu encrypts POST body on https://businessinsider.com.pl/
if ( ( Array . isArray ( value ) && value . length === 1 && ! value [ 0 ] ) || ! value ) {
return [ 'requestBody' , key ] ;
} else if ( ! Array . isArray ( value ) ) {
2022-07-09 15:51:34 +02:00
return [
'raw' ,
String . fromCharCode . apply ( null , Array . from ( new Uint8Array ( value . bytes ) ) ) ,
] ;
2022-04-22 13:00:02 +02:00
} else {
return [ key , value || '' ] ;
}
} ) ,
StolenDataEntry . parseValue
) . map ( ( [ key , value ] ) = > new StolenDataEntry ( this , 'request_body' , key , value ) ) ;
return ret ;
}
hasReferer() {
return this . requestHeaders . some ( ( h ) = > h . name === 'Referer' ) ;
}
hasCookie() {
return this . requestHeaders . some ( ( h ) = > h . name === 'Cookie' ) ;
}
getCookie ( ) : string {
2022-07-09 15:51:34 +02:00
return this . requestHeaders . find ( ( h ) = > h . name == 'Cookie' ) ? . value || '' ;
2022-04-22 13:00:02 +02:00
}
getPathParams ( ) : StolenDataEntry [ ] {
const url = new URL ( this . data . url ) ;
const path = url . pathname ;
if ( ! path . includes ( ';' ) ) {
return [ ] ;
}
return flattenObjectEntries (
path
. split ( ';' )
. map ( ( e ) = > e . split ( '=' ) )
. map ( ( [ key , value ] ) = > [ key , value || '' ] )
. map ( ( [ key , value ] ) = > {
return [ key , StolenDataEntry . parseValue ( safeDecodeURIComponent ( value ) ) ] ;
} )
) . map ( ( [ key , value ] ) = > new StolenDataEntry ( this , 'pathname' , key , value ) ) ;
}
getQueryParams ( ) : StolenDataEntry [ ] {
const url = new URL ( this . data . url ) ;
return flattenObjectEntries (
2022-07-09 15:51:34 +02:00
( Array . from ( ( url . searchParams as any ) . entries ( ) ) as [ string , string ] [ ] )
. map ( ( [ key , value ] : [ string , string ] ) = > [ key , value || '' ] )
2022-04-22 13:00:02 +02:00
. map ( ( [ key , value ] ) = > {
return [ key , StolenDataEntry . parseValue ( safeDecodeURIComponent ( value ) ) ] ;
} )
) . map ( ( [ key , value ] ) = > {
return new StolenDataEntry ( this , 'queryparams' , key , value ) ;
} ) ;
2021-10-03 20:13:36 +02:00
}
2021-11-25 21:13:15 +01:00
2022-04-22 13:00:02 +02:00
getHeadersData ( ) : StolenDataEntry [ ] {
return flattenObjectEntries (
this . requestHeaders
. filter ( ( header ) = > {
for ( const regex of whitelisted_cookies ) {
if ( regex . test ( header . name ) ) {
return false ;
}
}
return true ;
} )
. map ( ( header ) = > {
return [
header . name ,
2022-07-09 15:51:34 +02:00
StolenDataEntry . parseValue ( safeDecodeURIComponent ( header . value || '' ) ) ,
2022-04-22 13:00:02 +02:00
] ;
} )
) . map ( ( [ key , value ] ) = > new StolenDataEntry ( this , 'header' , key , value ) ) ;
}
hasMark() {
return this . stolenData . some ( ( data ) = > data . isMarked ) ;
}
2021-11-26 19:15:43 +01:00
2022-04-22 13:00:02 +02:00
getMarkedEntries() {
return this . stolenData . filter ( ( data ) = > data . isMarked ) ;
2021-11-26 19:15:43 +01:00
}
2022-04-22 13:00:02 +02:00
2022-08-14 12:55:21 +02:00
unmarkAllEntries() {
this . stolenData . forEach ( ( entry ) = > entry . unmark ( ) ) ;
}
2022-04-22 13:00:02 +02:00
getHost() {
return new URL ( this . url ) . host ;
2021-11-26 19:15:43 +01:00
}
2022-04-22 13:00:02 +02:00
matchesHAREntry ( har : HAREntry ) : boolean {
const rq = this . data ;
const hrq = har . request ;
return rq . url == hrq . url ;
}
toHAR ( ) : HAREntry {
return {
pageref : 'page_1' ,
startedDateTime : ` ${ new Date ( ) . toJSON ( ) . replace ( 'Z' , '+01:00' ) } ` ,
request : {
bodySize :
JSON . stringify ( this . requestBody . formData || { } ) . length +
( this . requestBody . raw || [ ] )
. map ( ( e ) = > e . bytes . byteLength )
. reduce ( ( a , b ) = > a + b , 0 ) ,
method : this.data.method ,
url : this.data.url ,
headersSize : JSON.stringify ( this . requestHeaders ) . length ,
httpVersion : 'HTTP/2' ,
headers : this.requestHeaders as NameValue [ ] ,
cookies : this.getCookieData ( ) . map ( ( cookie ) = > ( {
name : cookie.name ,
value : cookie.value ,
} ) ) ,
queryString : this.getQueryParams ( ) . map ( ( param ) = > ( {
name : param.name ,
value : param.value ,
} ) ) ,
postData : {
mimeType : 'application/x-www-form-urlencoded' ,
params : this.stolenData
. filter ( ( e ) = > e . source == 'request_body' )
. map ( ( e ) = > ( {
name : e.name ,
value : e.value ,
fileName : '--' + Math . ceil ( Math . random ( ) * 1000000000 ) ,
contentType : 'text/plain' ,
comment : '' ,
} ) ) ,
text : this.stolenData
. filter ( ( e ) = > e . source == 'request_body' )
. map ( ( e ) = > ` ${ e . name } : \ t ${ StolenDataEntry . parseValue ( e . value ) } ` )
. join ( '\n\n' ) ,
} ,
} ,
response : {
status : 200 ,
statusText : 'OK' ,
httpVersion : 'HTTP/2' ,
headers : [ ] ,
cookies : [ ] ,
content : {
mimeType : 'text/plain' ,
size : this.getBalancedPriority ( ) ,
encoding : 'base64' ,
text : 'ZG9lc24ndCBtYXR0ZXIK' ,
} ,
redirectURL : '' ,
headersSize : 15 ,
bodySize : 15 ,
} ,
cache : { } ,
timings : {
blocked : - 1 ,
dns : 0 ,
connect : 0 ,
ssl : 0 ,
send : 0 ,
wait : 79 ,
receive : 0 ,
} ,
time : 79 ,
_securityState : 'secure' ,
serverIPAddress : '31.13.92.36' ,
connection : '443' ,
} ;
2021-11-26 19:15:43 +01:00
}
2022-04-22 13:00:02 +02:00
getMaxPriority ( ) : number {
return Math . max ( . . . this . stolenData . map ( ( entry ) = > entry . getPriority ( ) ) ) ;
2021-11-26 19:15:43 +01:00
}
2022-04-22 13:00:02 +02:00
getBalancedPriority ( ) : number {
let result = 0 ;
if ( this . stolenData . some ( ( e ) = > e . exposesPath ( ) ) ) {
result += 50 ;
}
if ( this . stolenData . some ( ( e ) = > e . exposesHost ( ) ) ) {
result += 50 ;
}
if ( this . hasCookie ( ) ) {
result += 50 ;
}
if ( this . stolenData . some ( ( e ) = > e . classification === 'location' ) ) {
result += 300 ;
}
if ( this . url . includes ( 'facebook' ) ) {
result += 50 ;
}
return result ;
2021-11-26 19:15:43 +01:00
}
2021-10-03 09:03:56 +02:00
}