/* * OpenAnonymity XML Producer * Author: Mathias Kimpl matl@aon.at * Version: * Published under GNU GPL * * THIS FILE IS ONLY FOR DEMONSTRATION PURPOSES - * DOWNLOAD THE LATEST VERSION FROM http://sourceforge.net/projects/openanonymity * * Not solved or tested so far: */ /* http://apr.apache.org/docs/apr-util/group__APR__Util__XML.html http://apr.apache.org/docs/apr/group__APR__Table.html#a8 */ #include "httpd.h" #include "http_config.h" #include "http_log.h" #include "http_request.h" #include "apr_xml.h" #include "apr_lib.h" #include "apr_strings.h" #include //The default values for the Directives in httpd.conf #define DEFAULT_XMLFILE_NAME "openanonymity.xml" #define DEFAULT_STOP_PATTERN "" #define DEFAULT_DB_CONNECT 1 #define DEFAULT_DB_HOST "localhost" #define DEFAULT_DB_NAME "openanonymity" #define DEFAULT_DB_TYPE "pgsql" #define DEFAULT_DB_DRIVER_DIR "/usr/lib/dbd/" #define DEFAULT_DB_USERNAME "opan01" #define DEFAULT_DB_PASSWORD "opan01" //default for initial Array Size #define INITIAL_ARRAY_SIZE 20 //Linux dont know strcmpi, but strcasecmp #ifndef Win32 #define strcmpi strcasecmp #endif //Declare the name of this module module AP_MODULE_DECLARE_DATA opan_xml_producer_module; //Prototypes of all used functions (check if static is what i want) static int searchBucket(const char*,int,int,apr_array_header_t*, ap_filter_t *); static void dump_xml(apr_xml_elem *, int,request_rec *); static apr_xml_elem * getBranch(apr_xml_elem * , char * ,char * ); static apr_status_t fillAnonymizeListFromXMLNode(apr_array_header_t * , apr_xml_elem *); static void q_sort(apr_array_header_t * , int , int ); static int isInList(char ,apr_array_header_t * ,int ,int,ap_filter_t *); static void normalizeIntArray(apr_array_header_t *,ap_filter_t *f); static void correlateAnonymizeLists(apr_array_header_t *, apr_array_header_t *, ap_filter_t *); static void trimDirectoryAnonymizeList(apr_array_header_t *, apr_array_header_t *,ap_filter_t *); apr_status_t makeDBConnect(dbi_conn **, ap_filter_t *); //This modules per-server configuration structure. typedef struct { char *opanXMLFileName; //"openanonymity.xml" char *stopPattern; // char *db_host; //"localhost" char *db_type; //"pgsql" or "mysql" char *db_username; //"opan01" char *db_password; //"opan01" char *db_name; //"openanonymity" char *db_driverdir; //"/usr/lib/dbd/" int db_connect_switch; //on | off, 1|0 } open_anonymity_xml_producer_config; /*_______GLOBALS_______________*/ static int contentMatchPos; //indicates where a match was made in the content string, TEMPORARY GLOBAL, change it SOON /* * This modules 'brain', to save data till the next processing (within one request?) */ typedef struct open_anonymity_xml_producer_ctx_struct_t { int state; char *serverPathWithQuery; char *pathTranslated; apr_array_header_t *posArray; apr_array_header_t *directoryAnonymizeList; //Array for the List Items of Tag List in openanonymity.xml apr_xml_doc *doc; //XML Doc apr_file_t *fd; //Filepointer to openanonymity.xml file } open_anonymity_xml_producer_ctx_struct; /* * This function is registered as a handler for HTTP methods and will * therefore be invoked for all GET requests (and others). * */ static apr_status_t mod_open_anonymity_xml_producer_method_handler (ap_filter_t *f, apr_bucket_brigade *bb) { /*__________INIT_PARAMS_MODULE_RELATED_______________________________________________________________*/ open_anonymity_xml_producer_ctx_struct *ctx = f->ctx; //the context of this module apr_bucket *e; //the current bucket const char *reqFileSrvPath; //the part of the URI after the host: www.opan.org/index.html -> /index.html //char *pathTranslated; //char *serverPathWithQuery; const char *requestFileName; // the name of the requested File const char *relativeReqFilePath; /*__________INIT_PARAMS_XML_FILE_RELATED_____________________________________________________________*/ apr_status_t rv; //Status Var to check errors when opening files etc apr_xml_elem *anonymizeList; //XML Element - One Node within the Doc apr_xml_elem *tmpXMLElement; //XML Element - One Node within the Doc apr_xml_elem *newXMLElement; //XML Element - One Node within the Doc apr_text_header* newXMLElementTextHeader; apr_text* newText; apr_array_header_t *fileAnonymizeList; //Array for the List Items of Tag List in openanonymity.xml char ** listElem; ///One item of the anonym-list-array // Get the module configuration open_anonymity_xml_producer_config *s_cfg = ap_get_module_config(f->r->server->module_config, &opan_xml_producer_module); //only for testing, act only on specified browser, otherwise stop open anonymity const char *ua; //User-Agent Identifier ua = apr_table_get(f->r->headers_in, "User-Agent"); if (ua && (strncmp(ua, "Opera", 5) == 0)) { ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): User Agent is %s",ua); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," This module will stop processing"); ap_remove_output_filter(f); return ap_pass_brigade(f->next, bb); } ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): Directive Data"); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," XML File Name: %s",s_cfg->opanXMLFileName); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," Stop Pattern: %s",s_cfg->stopPattern); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," DB Host: %s",s_cfg->db_host); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," DB Type: %s",s_cfg->db_type); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," DB Username: %s",s_cfg->db_username); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," DB Password: %s",s_cfg->db_password); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," DB Name: %s",s_cfg->db_name); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," DB Driverdir: %s",s_cfg->db_driverdir); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," DB Connect Switch: %d",s_cfg->db_connect_switch); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," Canonical Filename: %s",f->r->parsed_uri.path); /*__________BEGIN_FUNCTIONALITY_____________________________________________________________________*/ //never been called so far, so init the context if (ctx == NULL) { ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): Init ctx"); f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(*ctx)); } //Initialisation only for the first brigade within this request //Database connect, Request Parameter Processing, file Open //with a test with a file of 19MB i never got a second call of mod_open_anonymity_xml_producer_method_handler //but with php i got one. It passes the EOS Bucket in a seperate BB if (ctx->state == 0) { ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): First Call (ctx->state==0)"); char * pos; ctx->state = 1; //apr_table_unset(f->r->headers_out, "Content-Length");//checken, ob danach richtig setzen ctx->posArray = apr_array_make(f->r->pool, INITIAL_ARRAY_SIZE, sizeof(int *));//Initialize the Position Array for later use //_________________________________________________________________GET REQUEST PARAMETER reqFileSrvPath = f->r->uri; // should be something like "/index.html" ctx->pathTranslated= f->r->filename; //absolute path to requested file like "/usr/local/share/apache2/htdocs/" requestFileName = apr_filename_of_pathname(f->r->canonical_filename);// Filename of Request like "index.html" //Get the relative path of this request (not the absolute path), later used for DB Query relativeReqFilePath = apr_pstrdup(f->r->pool,reqFileSrvPath); //duplicate the string pos = strstr(relativeReqFilePath, requestFileName); //search for filename (e.g index.html) in filepath (e.g /index.html) *pos = '\0';//now terminate the string at this position //now in relativeReqFilePath is only the relative path to the file if(f->r->parsed_uri.query == NULL){ //if we dont have a query in the uri (index.php?id=2) ctx->serverPathWithQuery = apr_pstrcat(f->r->pool,reqFileSrvPath,NULL); } else{ ctx->serverPathWithQuery = apr_pstrcat(f->r->pool,reqFileSrvPath,"?",f->r->parsed_uri.query,NULL); } //Set the end of this path to position before Filename- so cut out the path // like "/usr/local/share/apache2/htdocs/ ctx->pathTranslated[(strlen(ctx->pathTranslated)-strlen(requestFileName))] = '\0'; //Log the Request Params ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): Request Params"); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," Content Type: %s",f->r->content_type); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," Server Path: %s",reqFileSrvPath); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," ServerPath with Query: %s",ctx->serverPathWithQuery); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," Absolute Path: %s",ctx->pathTranslated); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," Relative Path: %s",relativeReqFilePath); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," Filename: %s",requestFileName); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," Canonical Filename: %s",f->r->canonical_filename); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," Canonical Filename: %s",f->r->parsed_uri.path); //READ OPENANONYMITY.XML FILE and BUILD a LIST WITH ALL ANONYM WORDS for this folder //Open File openanonymity.xml for read const char * opanFilePath; opanFilePath = apr_pstrcat(f->r->pool,ctx->pathTranslated,s_cfg->opanXMLFileName,NULL); rv = apr_file_open(&(ctx->fd),opanFilePath, APR_WRITE | APR_READ, APR_OS_DEFAULT, f->r->pool); if (rv != APR_SUCCESS) { ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): Could not open xml file (%s)",opanFilePath); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," This module stops executing, please check the xml File (if exist, rights...)"); ap_remove_output_filter(f);//remove this filter from this request, otherwise i will get a segfault //because of wrong usage of ctx->state == 0 return ap_pass_brigade(f->next, bb); //return DECLINED; } //Parse File openanonymity.xml means produce internal representation of xml apr_xml_parser *parser; //xml Parser to parse openanonymity.xml rv = apr_xml_parse_file(f->r->pool, &parser, &(ctx->doc), ctx->fd, 2000); if (rv != APR_SUCCESS) { char errbuf[2000];//Error Buffer String for error messages char errbufXML[2000]; //Error Buffer String for error messages ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r, "OPAN (XML Producer): Parse error in XML File APR Error: %s XML Error: %s", apr_strerror(rv, errbuf, sizeof(errbuf)), apr_xml_parser_geterror(parser, errbufXML, sizeof(errbufXML))); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," This module stops executing, please validate the xml File."); ap_remove_output_filter(f);//remove this filter from this request, otherwise i will get a segfault //because of wrong usage of ctx->state == 0 return ap_pass_brigade(f->next, bb); } //This list is used to store all anonymize words either from DB or XML File //Type is an array of pointers to Strings(so to char *) ctx->directoryAnonymizeList = apr_array_make(f->r->pool, INITIAL_ARRAY_SIZE, sizeof(char **)); //Get the Config Data of the openanonymity.xml file apr_xml_elem *configElement; //Config Node configElement = getBranch(ctx->doc->root,"config",""); //Get list with all words to anonymize of the Config Data anonymizeList = getBranch(configElement,"list",""); /*_______DATABASE_UPDATE_FUNCTIONALITY________________________________*/ if(s_cfg->db_connect_switch == 1){ //then we want to make an update of openanonymity.xml with the Database //ONLY IF THE TIMESTAMP IS CORRECT..............MAKE SOON dbi_conn conn; dbi_result result; const char ** arrayElem; //Connect to Database, update the internal used list of anonym words ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): make Database connect"); rv = makeDBConnect(&conn, f);//pass the adress of this conn if (rv == APR_SUCCESS) { result = dbi_conn_query(conn, "SELECT anonymize FROM anonymizelist where dir = '%s'",relativeReqFilePath); while (dbi_result_next_row(result)) { char* tmpstr; arrayElem = apr_array_push(ctx->directoryAnonymizeList);//get an element tmpstr = apr_pstrdup(f->r->pool,(char *)dbi_result_get_string(result, "anonymize")); //duplicate the string, otherwise it is away after DB close *arrayElem = tmpstr; /* ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): Database access data"); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," LibDbi Version %s", dbi_version()); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," Type of Field anonymize: %d", dbi_result_get_field_type(result,"anonymize")); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," Number of Fields in Row: %d", dbi_result_get_numfields(result)); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," Size of Field: %d", dbi_result_get_field_size(result, "anonymize")); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," Content of Field: %s", dbi_result_get_string(result, "anonymize")); */ } //_________CREATE THE NEW NODEs of this anonymize words out of the db in openanonymity.xml, throw away the old listElem = (char **) ctx->directoryAnonymizeList->elts;//set again to first Elem //combinedAnonymizeListElement = (char **) directoryAnonymizeList->elts; //posArrayElement = (int *) posArray->elts; int i; for(i=0;i < ctx->directoryAnonymizeList->nelts;i++){ char * str; //insert the rest of the array entries as XML Element into openanonymity.xml //create a new xml element newXMLElement = apr_pcalloc(f->r->pool, sizeof(apr_xml_elem)); newXMLElement->ns = -10;//means no namespace newXMLElement->name="listitem"; newXMLElementTextHeader = apr_pcalloc(f->r->pool, sizeof(apr_text_header)); newText = apr_pcalloc(f->r->pool, sizeof(apr_text)); newText->text =(char *)listElem[i]; newXMLElementTextHeader->first = newText; newXMLElement->first_cdata = *newXMLElementTextHeader; newXMLElement->parent=anonymizeList; if(i==0){ //only for the first element anonymizeList->first_child=newXMLElement; } else{ anonymizeList->last_child->next=newXMLElement; } anonymizeList->last_child = newXMLElement; } dbi_result_free(result); dbi_conn_close(conn); dbi_shutdown(); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): shutted down Database connect"); //Now delete the old per file anonymize list in openanonymity.xml and create a new one with the words of the db } else{//DB Connect was not successfull ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): Could not make Database Connect"); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," This module stops executing"); ap_remove_output_filter(f);//remove this filter from this request, otherwise i will get a segfault return ap_pass_brigade(f->next, bb); } } /*_______END_DATABASE_UPDATE_FUNCTIONALITY________________________________*/ else{ //take the anonymize words of the xml file //Fill the Array=List with the data of the xml file rv = fillAnonymizeListFromXMLNode(ctx->directoryAnonymizeList,anonymizeList); if (rv != APR_SUCCESS) { ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): Error while filling the internal array representation of this folders anonymize words"); } } //Sort the Array q_sort(ctx->directoryAnonymizeList,0,ctx->directoryAnonymizeList->nelts-1); //now i have a sorted array with all Listitems out of the openanonymity.xml file or DB }//END IF ctx->state == 0 else{ ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): Second Call (ctx->state!=0)"); } //one bucket seems to be 4194304 Bytes for static HTML Files //go through all buckets(e) of this bucket-brigade(bb) APR_BRIGADE_FOREACH(e, bb) { const char *data; //holds the bucket data apr_size_t bucketLength; //Length of Bucket int matchPos=-1; //Match Pos is the Position in Anonym-List-Array of the match word int searchOffset=0; ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): For each Bucket"); //get a new dynamic array from apache, initial size is INITIAL_ARRAY_SIZE, array holds the Position //of all Patterns, destroy it each time a new bucket began if (APR_BUCKET_IS_EOS(e)) { //This is the last processed Bucket within the whole Request! ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): Bucket EOS"); //go through all Matches and update the openanonymity.xml file ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): Kill double entries in the Int Array (# %d)",ctx->posArray->nelts); normalizeIntArray(ctx->posArray,f);//means, kill double entries in the POSITION ARRAY - not in anonym-list-array //delete the unused elements in directoryAnonymList, should be clearer that after that the //array isnt a directory-Anonym List, but a FileAnonymize List ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): Kill unused entries in the Anonymize Array (# %d)",ctx->directoryAnonymizeList->nelts); trimDirectoryAnonymizeList(ctx->posArray, ctx->directoryAnonymizeList,f); listElem = (char **) ctx->directoryAnonymizeList->elts;//set again to first Elem //Create an array for the currently used anonym items of this page as defined in openanonymity.xml fileAnonymizeList = apr_array_make(f->r->pool, INITIAL_ARRAY_SIZE, sizeof(char **)); //____________GET THE LIST NODE OF THIS PAGE. If not exist, create one____________________________ //get the link-Node of this page, then access the father, next access the list-node tmpXMLElement = getBranch(ctx->doc->root,"link",ctx->serverPathWithQuery); if(tmpXMLElement != NULL){ //means, this is not a request to an unknown page ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): The requested page is known (Node Name = %s)",tmpXMLElement->name); tmpXMLElement = tmpXMLElement->parent;//father of link = page newXMLElement = getBranch(tmpXMLElement,"list",""); if(newXMLElement == NULL){ //there is currently no list elem in this page node,->create one newXMLElement = apr_pcalloc(f->r->pool, sizeof(apr_xml_elem)); newXMLElement->name="list"; newXMLElement->ns = -10;//means no namespace newXMLElement->parent = tmpXMLElement;//this page node is child of root tmpXMLElement->last_child->next = newXMLElement; tmpXMLElement->last_child = newXMLElement; } tmpXMLElement = newXMLElement;//necessary for further processing //Whole rest of IF Part is SENSELESS //this makes only sense when manual editing of per file anonymize words is correct. but it isnt //fill new array with the currently used anonym tags of this page or url //rv = fillAnonymizeListFromXMLNode(fileAnonymizeList,tmpXMLElement); //if (rv != APR_SUCCESS) { //ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"Open Anonymity XML Producer: Error filling the array data of currently used anonym tags"); //} //kill entries in the directoryAnonymizeList array that already exists in openanonymity.xml //correlateAnonymizeLists(directoryAnonymizeList, fileAnonymizeList,f); ...SENSELESS!!!!????? } else{ //this current request isnt known in openanonymity.xml apr_xml_elem *tmp2XMLElement; //XML Element - One Node within the Doc //create the new Node(s) page(link,list) ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): The requested page is unknown"); newXMLElement = apr_pcalloc(f->r->pool, sizeof(apr_xml_elem)); newXMLElement->name="page"; newXMLElement->ns = -10;//means no namespace newXMLElement->parent=ctx->doc->root;//this page node is child of root ctx->doc->root->last_child->next=newXMLElement; ctx->doc->root->last_child = newXMLElement; //create link element tmp2XMLElement = apr_pcalloc(f->r->pool, sizeof(apr_xml_elem)); tmp2XMLElement->name="link"; tmp2XMLElement->ns =-10;//means no namespace newXMLElementTextHeader = apr_pcalloc(f->r->pool, sizeof(apr_text_header)); newText = apr_pcalloc(f->r->pool, sizeof(apr_text)); newText->text = (char *)ctx->serverPathWithQuery; newXMLElementTextHeader->first = newText; tmp2XMLElement->first_cdata = *newXMLElementTextHeader; tmp2XMLElement->parent=newXMLElement;//this link node is child of page newXMLElement->first_child=tmp2XMLElement; //create list node tmpXMLElement = apr_pcalloc(f->r->pool, sizeof(apr_xml_elem)); tmpXMLElement->name="list"; tmpXMLElement->ns = -10;//means no namespace tmpXMLElement->parent=newXMLElement;//this list node is child of page newXMLElement->first_child->next = tmpXMLElement; newXMLElement->last_child = tmpXMLElement; } //_________CREATE THE NEW NODEs of this File Request's anonymize words in openanonymity.xml listElem = (char **) ctx->directoryAnonymizeList->elts;//set again to first Elem //combinedAnonymizeListElement = (char **) directoryAnonymizeList->elts; //posArrayElement = (int *) posArray->elts; int i; ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): Create New/Updated List of anonymize words (# %d)",ctx->directoryAnonymizeList->nelts); for(i=0;i < ctx->directoryAnonymizeList->nelts;i++){ //insert the rest of the array entries as XML Element into openanonymity.xml //create a new xml element ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): %d: %s ",i,(char *)listElem[i]); newXMLElement = apr_pcalloc(f->r->pool, sizeof(apr_xml_elem)); newXMLElement->ns = -10;//means no namespace newXMLElement->name="anonymize"; newXMLElementTextHeader = apr_pcalloc(f->r->pool, sizeof(apr_text_header)); newText = apr_pcalloc(f->r->pool, sizeof(apr_text)); newText->text = (char *)listElem[i]; newXMLElementTextHeader->first = newText; newXMLElement->first_cdata = *newXMLElementTextHeader; newXMLElement->parent=tmpXMLElement; if(i==0){ //only for the first element tmpXMLElement->first_child=newXMLElement; //if((i+1) == ctx->directoryAnonymizeList->nelts){ //if there is only one element //tmpXMLElement->first_child->next=newXMLElement; //} } else{ tmpXMLElement->last_child->next=newXMLElement; } tmpXMLElement->last_child = newXMLElement; } ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): Write the file"); const char *text;//for changing the xml tree to text //ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"NameSpace: %s %d",APR_XML_GET_URI_ITEM(doc->namespaces, 0),doc->namespaces->nelts); apr_xml_to_text(f->r->pool, ctx->doc->root, APR_XML_X2T_FULL_NS_LANG, ctx->doc->namespaces, NULL, &text, NULL); //ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"NameSpace: %s %d",APR_XML_GET_URI_ITEM(doc->namespaces, 0),doc->namespaces->nelts); apr_size_t xmlFileLength; //hold the length of the openanonymity.xml file xmlFileLength=strlen(text); //ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): Output: %s",text); apr_off_t fileOffset = 0; apr_file_seek(ctx->fd,APR_SET,&fileOffset);//reset to beginning apr_file_write(ctx->fd,text,&xmlFileLength); apr_file_trunc(ctx->fd,xmlFileLength); //truncate the file, maybe there is more apr_file_close(ctx->fd); //continue;//EOS is the last bucket, should stop this FOREACH }// END EOS else{ //This is any other BUCKET ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): Any Bucket"); // read the bucket data apr_bucket_read(e, &data, &bucketLength, APR_BLOCK_READ); contentMatchPos=0; //initialize global variable - only TEMPORARELY //LOGGING________________________________ listElem = (char **) ctx->directoryAnonymizeList->elts;//set again to first Elem int i; for(i=0;i < ctx->directoryAnonymizeList->nelts;i++){ ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," Listitem # %d: %s",i,(char *)listElem[i]); } //LOGGING________________________________ //went through this bucket(e) and search for all ANONYM WORDS in the LISTARRAY, remember array positions do{ ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"OPAN (XML Producer): Searching for anonymize words..."); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r," Size of Anonymize List: %d",ctx->directoryAnonymizeList->nelts); matchPos = searchBucket(data,bucketLength,searchOffset,ctx->directoryAnonymizeList, f);//search for in anonym-list if(matchPos!=-1){//one exact match found int *currentPos; //holds the current Position of anonym words in array currentPos = apr_array_push(ctx->posArray);//get new int * from the array to store List-Array Position of this match *currentPos = matchPos; //remember Array Position of this match listElem = (char **) ctx->directoryAnonymizeList->elts; searchOffset=contentMatchPos + strlen((char *)listElem[matchPos]);//next loop, begin searching after last match } }while (matchPos != -1);//do till no more anonym words found } }// END APR_BRIGADE_FOREACH ap_pass_brigade(f->next, bb); return APR_SUCCESS; } /* searchBucket is to search str for the first occurence of one of the patterns in the array, starting * at Position Offset. Because str is not terminated by EOS, also the maximum * length is delivered.It passes back the position when a PATTERN was found, * otherwise -1 * const char* str: The string to search in * int len: The maximum length to search * int searchOffset: Where to start within str * apr_array_header_t* patternList: An Array with all Search Patterns * request_rec * r: ONLY FOR LOGGING PURPOSES - DELETE * * Currently Known Bugs: * Problem with anonymize words "rimbert" and "rimbert.rudisch@fh-hagenberg.at", stops searching at "rimbert" */ static int searchBucket(const char* str,int bucketLength, int searchOffset,apr_array_header_t* patternList, ap_filter_t *f){ char contentChar; //holds each single character of bucket content char contentCharPreview; //used to make a preview from the current char unsigned int matchCounter=0;//count the matching characters int i=0,j=0,k=0; // loop counter variables int stop = 1; //to leave inner for int isInListval=0; //Position of the match in the anonym-list-array char ** listElem; //One item of the array //go through the whole string str by each single character for(i=searchOffset;ielts; //We have one more char match when we dont get -1. Second Condition is to check if we have a whole word match if(isInListval != -1 && strlen((char *)listElem[isInListval]) > (matchCounter+2)){ matchCounter++; } else{ //maybe we dont found another char-match, maybe we have an exact match stop=0;//leave this inner for, should try break; } k++; } if(isInListval != -1){ //listElem = (char **) patternList->elts; if((matchCounter+2) == strlen((char *)listElem[isInListval])){ //then we have found one whole PATTERN, return position within array contentMatchPos=i; return isInListval; } } } } return -1; //means pattern not found in whole string } /* getBranch searches for the occurence of one Node with name nodeName within the tree with root node * Works recursive, breadth first. (Optional it also compares the Text of the Node with nodeText) * apr_xml_elem * node: The Node to start searching * char * nodeName: The name of the searched Node * returns: The Pointer to the found Node */ static apr_xml_elem * getBranch(apr_xml_elem * node, char * nodeName, char * nodeText){ apr_xml_elem *childNode; //Child Nodes of node apr_xml_elem *matchNode; //matchNode, if there is one if (node->first_child) { //if node has a child childNode = node->first_child; while (childNode) { if(strcmp(childNode->name,nodeName)==0){ if(strcmp(nodeText,"")!=0){//if we should also check the text if(childNode->first_cdata.first){//check if listitem has cdata if(strcmp(childNode->first_cdata.first->text,nodeText)==0){//if text matches return childNode; } } } else{ return childNode; } } childNode = childNode->next; } //no match on this level, lets get deeper childNode = node->first_child; while (childNode) { matchNode = getBranch(childNode,nodeName,nodeText);//RECURSION //stop directly when a match occurs if(matchNode){ if(strcmp(nodeText,"")!=0){//if we should also check the text if(matchNode->first_cdata.first){//check if listitem has cdata if(strcmp(matchNode->first_cdata.first->text,nodeText)==0){//if text matches return matchNode; } } } else{ return matchNode; } } childNode = childNode->next; } return NULL; } else{ return NULL;//no match } } /* fillAnonymizeListFromXMLNode gets a node (name= list) with childnode listitems. * the content of the listitems is written to the array * apr_array_header_t * array: The array to fill * apr_xml_elem * listNode: The Root Node * The structure could look like this * * Kimpl * Mathias * Mathias Kimpl * mathias.kimpl@fh-hagenberg.at * matl@aon.at-neu * rimbert.rudisch@fh-hagenberg.at * */ static apr_status_t fillAnonymizeListFromXMLNode(apr_array_header_t * array, apr_xml_elem * listNode ){ apr_xml_elem *childNode; //child Node of listNode, so one listitem const char ** arrayElem; //One Element within the array //termination, when something is wrong if(listNode == NULL) {return -1;} //no node if(strcmp(listNode->name,"list")!=0){ //wrong name of Node return -1; } //go through the listitems if (listNode->first_child) { //if node has a child childNode = listNode->first_child; while (childNode) { if(childNode->first_cdata.first){//check if listitem has cdata arrayElem = apr_array_push(array);//get an element *arrayElem = childNode->first_cdata.first->text; //store text } childNode = childNode->next; //go to next listitem } } return 0; //0...everythings fine } /*sorts the passed array alphabetically * with Quick Sort Algorithm from * http://linux.wku.edu/~lamonml/algor/sort/quick.html */ static void q_sort(apr_array_header_t * array, int left, int right) { int l_hold, r_hold; char * pivot; int pivotInt; const char ** arrayElem; //One Element within the array arrayElem = (char **) array->elts; l_hold = left; r_hold = right; pivot = (char *) arrayElem[left]; while (left < right) { //while ((arrayElem[right] >= pivot) && (left < right)) while ((strcmpi(arrayElem[right],pivot)>=0) && (left < right)) right--; if (left != right) { arrayElem[left] = arrayElem[right]; left++; } //while ((arrayElem[left] <= pivot) && (left < right)) while ((strcmpi(arrayElem[left],pivot)<=0) && (left < right)) left++; if (left != right) { arrayElem[right] = arrayElem[left]; right--; } } arrayElem[left] = pivot; pivotInt = left; left = l_hold; right = r_hold; if (left < pivotInt) q_sort(array, left, pivotInt-1); if (right > pivotInt) q_sort(array, pivotInt+1, right); } //should be extended to an effective search algo /* isInList searches for character ch in the array patternList (anonym-tag-list). It * starts at arrayPos, in that array-element at charPos. If it founds a match, it returns * the matching array Position, otherwise -1 * * ch: the character to look for * * patternList: the array to look at * arrayPos: the specific array position to start search * charPos: The POsition within the specific array-element to start search * r: ONLY FOR LOGGING PURPOSES, DELETE * returns: * -1 ......... no char found * arrayPos.... Position within the array where the next match was found */ static int isInList(char ch,apr_array_header_t * patternList,int arrayPos,int charPos, ap_filter_t *f){ int i; char ** listElem = (char **) patternList->elts; for(i=arrayPos;inelts;i++){ if(listElem[i][charPos] == ch) { return i; } //not found at this position of array, go to next arrayItem, //first check if still the former char is correct e.g Math and Kath, pos a -> Stop if(charPos!=0 && i<(patternList->nelts-1)){ if(listElem[i][charPos-1] != listElem[i+1][charPos-1]){//check if end within this char is reached return -1; } } } return -1; } /* normalizeIntArray searches for double entries in posArray and eliminates them * by making a new array out of the pool provided by f, and swaps the two arrays. * */ static void normalizeIntArray(apr_array_header_t *posArray, ap_filter_t *f){ int *posArrayElement; int *newposArrayElement; int i,j; int alreadyExists=0; int *newIntVal_p; apr_array_header_t *newList = apr_array_make(f->r->pool, INITIAL_ARRAY_SIZE, sizeof(int *)); request_rec *r = f->r; posArrayElement = (int *) posArray->elts; newposArrayElement = (int *) newList->elts; newIntVal_p = apr_array_push(newList); *newIntVal_p = posArrayElement[0]; for(i=1;inelts;i++){ alreadyExists=0; for(j=0;jnelts;j++){ if(newposArrayElement[j]==posArrayElement[i]){ alreadyExists =1; } } if(!alreadyExists){ newIntVal_p = apr_array_push(newList); *newIntVal_p = posArrayElement[i]; } } *posArray = *newList;//throw away the old array } /* normalizeCharArray searches for double entries in posArray and eliminates them * by making a new array out of the pool provided by f, and swaps the two arrays. * after that it produces a new */ static void normalizeCharArray(apr_array_header_t *combinedAnonymizeList, ap_filter_t *f){ char** combinedAnonymizeListElement; char** newArrayElement; int i,j; int alreadyExists=0; char** newChar_p; apr_array_header_t *newListArray = apr_array_make(f->r->pool, INITIAL_ARRAY_SIZE, sizeof(char **)); //request_rec *r = f->r; combinedAnonymizeListElement = (char **) combinedAnonymizeList->elts; newArrayElement = (char **) newListArray->elts; newChar_p = apr_array_push(newListArray); *newChar_p = combinedAnonymizeListElement[0]; for(i=1;inelts;i++){ alreadyExists=0; for(j=0;jnelts;j++){ if(strcmp((char *)newArrayElement[j],(char *)combinedAnonymizeListElement[i])==0){ alreadyExists =1; } } if(!alreadyExists){ newChar_p = apr_array_push(newListArray); *newChar_p = combinedAnonymizeListElement[i]; } } *combinedAnonymizeList = *newListArray;//throw away the old array } /* correlateAnonymizeLists: This method gets a list of unique anonymize words which appeared * in the request. it also gets a list of anonymize words currently in the openanonymity.xml or db for this folder, * and trust that this list has also unique entries. * This method copies the list entries of the second list to the end of the first. Then it * passes this list with all entries to method normalizeCharArray who eliminates the double entries */ static void correlateAnonymizeLists(apr_array_header_t *directoryAnonymizeList, apr_array_header_t *fileAnonymizeList, ap_filter_t *f){ char ** listElem; const char ** arrayElem; //One Element within the array int i; listElem = (char **) fileAnonymizeList->elts; for(i=0;inelts;i++){ arrayElem = apr_array_push(directoryAnonymizeList);//get an element *arrayElem = listElem[i]; //store text } normalizeCharArray(directoryAnonymizeList,f); } /* trimDirectoryAnonymizeList: deletes the unused elements in directoryAnonymizeList * After this method, in directoryAnonymizeList are only elements (anonymize words) that appeared in the request * */ static void trimDirectoryAnonymizeList(apr_array_header_t *posArray, apr_array_header_t *directoryAnonymizeList,ap_filter_t *f){ char** oldlistElem; int * posArrayElement; char** newChar_p; int i; apr_array_header_t *newListArray = apr_array_make(f->r->pool, INITIAL_ARRAY_SIZE, sizeof(char **)); posArrayElement = (int *) posArray->elts; oldlistElem = (char **) directoryAnonymizeList->elts; //copy all oldListElements as defined with posArray into newList for(i=0;inelts;i++){ newChar_p = apr_array_push(newListArray); *newChar_p = oldlistElem[posArrayElement[i]]; } *directoryAnonymizeList = *newListArray;//throw away the old array } /* makeDBConnect connects to the Database specified in httpd.conf * Params: conn: a pointer to the pointer of the Connection, dereference with * to give it back to caller * ap_filter_t ... to write logs */ apr_status_t makeDBConnect(dbi_conn **conn, ap_filter_t *f){ int numdrivers; const char *errmsg; open_anonymity_xml_producer_config *s_cfg = ap_get_module_config(f->r->server->module_config, &opan_xml_producer_module); numdrivers = dbi_initialize(s_cfg->db_driverdir); if (numdrivers < 0) { ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r, "Unable to initialize libdbi! Make sure you specified a valid driver directory."); dbi_shutdown(); return -1; } else if (numdrivers == 0) { ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r, "Initialized libdbi, but no drivers were found!\n"); dbi_shutdown(); return -1; } if ((*conn = dbi_conn_new(s_cfg->db_type)) == NULL) { ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"Can't instantiate driver into a dbi_conn!"); dbi_shutdown(); return -1; } dbi_conn_set_option(*conn, "host", s_cfg->db_host); dbi_conn_set_option(*conn, "username", s_cfg->db_username); dbi_conn_set_option(*conn, "password", s_cfg->db_password); dbi_conn_set_option(*conn, "dbname", s_cfg->db_name); if (dbi_conn_connect(*conn) < 0) { dbi_conn_error(*conn, &errmsg); ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,"Unable to connect! Error message: %s", errmsg); dbi_shutdown(); return -1; } return APR_SUCCESS; } /*___________________________SET_CONFIGURATION_DIRECTIVES______________________________________________________________*/ /** * This function is called when the "XMLFileName" configuration directive is parsed. */ static const char *set_xmlFileName(cmd_parms *parms, void *mconfig, const char *arg) { // get the module configuration (this is the structure created by create_modtut2_config()) open_anonymity_xml_producer_config *s_cfg = ap_get_module_config(parms->server->module_config, &opan_xml_producer_module); // make a duplicate of the argument's value using the command parameters pool. s_cfg->opanXMLFileName = (char *) arg; // success return NULL; } /** * This function is called when the "OpenAnonymitySearchPatternStop" configuration directive is parsed. */ static const char *set_stop_pattern(cmd_parms *parms, void *mconfig, const char *arg) { // get the module configuration (this is the structure created by create_modtut2_config()) open_anonymity_xml_producer_config *s_cfg = ap_get_module_config(parms->server->module_config, &opan_xml_producer_module); // make a duplicate of the argument's value using the command parameters pool. s_cfg->stopPattern = (char *) arg; // success return NULL; } /** * This function is called when the "DBLocalize" configuration directive is parsed. */ static const char *set_db_localize(cmd_parms *parms, void *mconfig, const char *arg1, const char *arg2) { // get the module configuration (this is the structure created by create_modtut2_config()) open_anonymity_xml_producer_config *s_cfg = ap_get_module_config(parms->server->module_config, &opan_xml_producer_module); // make a duplicate of the argument's value using the command parameters pool. s_cfg->db_host = (char *) arg1; //e.g. localhost s_cfg->db_name = (char *) arg2; //e.g. name of database "openanonymity" // success return NULL; } /** * This function is called when the "LibDbiControl" configuration directive is parsed. */ static const char *set_libdbi_control(cmd_parms *parms, void *mconfig, const char *arg1, const char *arg2) { // get the module configuration (this is the structure created by create_modtut2_config()) open_anonymity_xml_producer_config *s_cfg = ap_get_module_config(parms->server->module_config, &opan_xml_producer_module); // make a duplicate of the argument's value using the command parameters pool. s_cfg->db_type = (char *) arg1; //e.g. pgsql s_cfg->db_driverdir = (char *) arg2; //e.g. /usr/lib/dbd/ // success return NULL; } /** * This function is called when the "DbAuthorization" configuration directive is parsed. */ static const char *set_db_authorization(cmd_parms *parms, void *mconfig, const char *arg1, const char *arg2) { // get the module configuration (this is the structure created by create_modtut2_config()) open_anonymity_xml_producer_config *s_cfg = ap_get_module_config(parms->server->module_config, &opan_xml_producer_module); // make a duplicate of the argument's value using the command parameters pool. s_cfg->db_username = (char *) arg1; //e.g. opan01 s_cfg->db_password = (char *) arg2; //e.g. opan01 // success return NULL; } /** * This function is called when the "DbAuthorization" configuration directive is parsed. */ static const char *set_db_connect(cmd_parms *parms, void *mconfig, int on) { // get the module configuration (this is the structure created by create_modtut2_config()) open_anonymity_xml_producer_config *s_cfg = ap_get_module_config(parms->server->module_config, &opan_xml_producer_module); // make a duplicate of the argument's value using the command parameters pool. s_cfg->db_connect_switch = (int) on; //e.g. 0....off 1....on // success return NULL; } /*___________________________CONTROL MODULE HANDLING_________________________________________________________________*/ /* * This function is a callback and it declares what other functions * should be called for request processing and configuration requests. * This callback function declares the Handlers for other events. */ static void mod_open_anonymity_xml_producer_register_hooks (apr_pool_t *p) { ap_register_output_filter("OPANXMLPRODUCER", mod_open_anonymity_xml_producer_method_handler,NULL, AP_FTYPE_CONTENT_SET); } /** * Creates the per-server (default) configuration records. */ static void *create_open_anonymity_xml_producer_config(apr_pool_t *p, server_rec *s) { open_anonymity_xml_producer_config *newcfg; // allocate space for the configuration structure from the provided pool p. newcfg = (open_anonymity_xml_producer_config *) apr_pcalloc(p, sizeof(open_anonymity_xml_producer_config)); // set the default value for the search pattern string. newcfg->opanXMLFileName = DEFAULT_XMLFILE_NAME; newcfg->stopPattern = DEFAULT_STOP_PATTERN; newcfg->db_host = DEFAULT_DB_HOST; newcfg->db_type = DEFAULT_DB_TYPE; newcfg->db_username = DEFAULT_DB_USERNAME; newcfg->db_password = DEFAULT_DB_PASSWORD; newcfg->db_name = DEFAULT_DB_NAME; newcfg->db_driverdir = DEFAULT_DB_DRIVER_DIR; newcfg->db_connect_switch = DEFAULT_DB_CONNECT; // return the new server configuration structure. return (void *) newcfg; } /** * A declaration of the configuration directives that are supported by this module. */ static const command_rec mod_open_anonymity_xml_producer_cmds[] = { AP_INIT_TAKE1("XMLFileName",set_xmlFileName,NULL,RSRC_CONF, "OpenAnonymityXMLFileName -- the changed name of openanonymity.xml, ." ), AP_INIT_TAKE1("SearchPatternStop",set_stop_pattern,NULL,RSRC_CONF, "OpenAnonymitySearchPatternStop -- the stop pattern that tells which data to filter." ), AP_INIT_TAKE2("DBLocalize",set_db_localize,NULL,RSRC_CONF, "DBLocalize -- (1)..Name of Host (e.g. localhost) (2) Name of Database." ), AP_INIT_TAKE2("LibDbiControl",set_libdbi_control,NULL,RSRC_CONF, "LibDbiControl -- (1)..Database Type (e.g. pgsql,mysql) (2) Directory to libdbi Driver (e.g. /usr/lib/dbd/)." ), AP_INIT_TAKE2("DbAuthorization",set_db_authorization,NULL,RSRC_CONF, "DbAuthorization -- (1)..Username (2) Password" ), AP_INIT_FLAG("DBConnect",set_db_connect,NULL,RSRC_CONF, "DBConnect Switch on/off usage of Database" ), {NULL} }; /* * Declare and populate the module's data structure. The * name of this structure ('tut1_module') is important - it * must match the name of the module. This structure is the * only "glue" between the httpd core and the module. */ module AP_MODULE_DECLARE_DATA opan_xml_producer_module = { STANDARD20_MODULE_STUFF, // standard stuff; no need to mess with this. NULL, // create per-directory configuration structures - we do not. NULL, // merge per-directory - no need to merge if we are not creating anything. create_open_anonymity_xml_producer_config, // create per-server configuration structures. NULL, // merge per-server - hrm - examples I have been reading don't bother with this for trivial cases. mod_open_anonymity_xml_producer_cmds, // configuration directive handlers mod_open_anonymity_xml_producer_register_hooks, // request handlers };