| 
  • If you are citizen of an European Union member nation, you may not use this service unless you are at least 16 years old.

  • You already know Dokkio is an AI-powered assistant to organize & manage your digital files & messages. Very soon, Dokkio will support Outlook as well as One Drive. Check it out today!

View
 

source for the broken link auTIMator

Page history last edited by Tim 15 years, 11 months ago
This probably won't compile without a few extra headers (depends on your OS). I just assume that the list of files to open is in a file called in.dat and that the output will go into a file called out.dat and that you are working out of a directory called backup. Additionally, winzip would use a _ instead of a : in filenames so this program does the same replacement.
You can download the backup zip file for the site here

// deadlinks.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"

void str_remove(char *str, char a)
{
        int offset = 0;
        char *rep=str;
        while(*rep)
        {
                if(*(rep+offset)==a)
                        offset++;
                *rep=*(rep+offset);
                rep++;
        }
}


void str_replace(char *str, char a, char b)
{
        char *rep=str; // replace _ with : for page titles
        while(*rep)
        {
                if(*rep==a)
                        *rep=b;
                rep++;
        }
}

struct ref_page {
        ref_page() { next=NULL; }
        ref_page(const char *title)
        {
                next=NULL;
                strcpy(page_title,title);

                str_replace(page_title,'_',':');
        }
        char page_title[1024];
        ref_page *next;
};

const int screen_output=1;

struct wiki_link
{
        wiki_link(const char *name, const char *page) 
        {
                strcpy(entry_name,name);
                str_remove(entry_name,'~');
                referring_page = new ref_page(page);
                char filename[2048];
                sprintf(filename,"c:/backup/%s",entry_name);
                if (screen_output) printf("  ** looking for file '%s'...",filename);
                
                str_replace(filename+2,':','_'); // replace : with _ for filenames

                FILE *check = fopen(filename,"r");
                if(check!=NULL)
                {
                        exists=true;
                        fclose(check);
                        if (screen_output) printf(" foundn");
                }
                else
                {
                        exists=false;
                        if (screen_output) printf(" not foundn");
                }
                next = NULL; 
                refcount=1;
        }
        int refcount;
        char entry_name[1024];
        bool exists;
        ref_page *referring_page;
        wiki_link *next;

        void addReferringPage(const char *filename);
};

void wiki_link::addReferringPage(const char *filename)
{
        ref_page *tmp = referring_page;
        ref_page check_ref(filename);
        while(tmp)
        {
                if(!strcmp(check_ref.page_title,tmp->page_title))
                        return; // already exists
                tmp=tmp->next;
        }
        // does not exist
        tmp = new ref_page(filename);
        tmp->next=referring_page;
        referring_page=tmp;
        refcount++;
}

wiki_link *gHead = NULL;


wiki_link *gFindReference(const char *link_name)
{
        if(!gHead)
                return NULL;
        wiki_link *cur=gHead;
        while(cur)
        {
                if(!strcmp(link_name,cur->entry_name))
                        return cur;
                cur=cur->next;
        }
        return NULL;
}

void gAddReference(char *link_name, const char *filename)
{
        if(link_name[0]==0)
                return;

        // modify link_name so that it does not include any '
        str_remove(link_name,'\');

        wiki_link *cur=gFindReference(link_name);

        if (!cur)
        {
                cur=new wiki_link(link_name,filename);
                cur->next=gHead;
                gHead=cur;
        }
        else
                cur->addReferringPage(filename);

}


int _tmain(int argc, _TCHAR* argv[])
{
        FILE *p = fopen("c:/backup/in.dat","r");
        FILE *q = fopen("c:/backup/out.dat","w");
        FILE *s = fopen("c:/backup/deadend.dat","w");

        char page_title[1024];
        while(fgets(page_title,1024,p))
        {
                page_title[strlen(page_title)-1]=0;
                if (screen_output) printf("Reading [%s]...n",page_title);

                char filename[2048];
                sprintf(filename,"c:/backup/%s",page_title);

                char link_name[1024]="";
                int a;
                int curchar=0;
                bool reading=false;
                FILE *r=fopen(filename,"r");
                if(!r)
                {
                        if (screen_output) printf("error: could not open %sn",filename);
                }
                else
                {
                        bool foundlink = false;
                        while((a=fgetc(r))!=-1)
                        {
                                //printf("%c",a);
                                if(a=='[')
                                {
                                        reading=true;
                                        foundlink=true;
                                }
                                else if (reading==true)
                                {
                                        if (a=='[')
                                        {
                                                printf("Error in file %s (link %s)",page_title,link_name);
                                                exit(0);
                                        }
                                        else if (a==']' || a=='|' || a=='#')
                                        {
                                                gAddReference(link_name,page_title);
                                                reading=false;
                                                curchar=0;
                                        }
                                        else
                                        {
                                                link_name[curchar]=a;
                                                link_name[curchar+1]=0;
                                                curchar++;
                                                if(curchar==1023)
                                                {
                                                        printf("link %s in file %s too long!",link_name,page_title);
                                                        exit(0);
                                                }
                                        }
                                }
                        }
                        if(!foundlink)
                                fprintf(s,"[%s]n",page_title);
                        fclose(r);
                }
        }

        // output phase
        wiki_link *cur = gHead;
        FILE *t = fopen("c:/backup/missing_ranked.dat","w");
        printf("Output missing links.n");
        int brokenCount=0;
        while(cur)
        {
                if(!cur->exists && !strchr(cur->entry_name,'@') && !strstr(cur->entry_name,"http:"))
                {
                        fprintf(q,"* [%s]: ",cur->entry_name);
                        fprintf(t,"* %02d [%s]: ",cur->refcount,cur->entry_name);
                        ref_page *page=cur->referring_page;
                        while(page)
                        {
                                fprintf(q,"[%s]. ",page->page_title);
                                fprintf(t,"[%s]. ",page->page_title);
                                page=page->next;
                        }
                        fprintf(q,"n");
                        fprintf(t,"n");
                        brokenCount++;
                }
                cur=cur->next;
        }
        fprintf(q,"''There are currently %d Broken Links listed!''n",brokenCount);
        for(int c='A';c<='Z';c++)
                fprintf(q,"* [%c !!n",c);

        fclose(q);
        q=fopen("c:/backup/everything.dat","w");
        cur = gHead;
        printf("Indexing everything.n");
        while(cur)
        {
                // if(!cur->exists)
                {
                        fprintf(q,"* (%03d references) [%s]: ",cur->refcount,cur->entry_name);
                        ref_page *page=cur->referring_page;
                        while(page)
                        {
                                fprintf(q,"[%s]. ",page->page_title);
                                page=page->next;
                        }
                        fprintf(q,"n");
                }
                cur=cur->next;
        }

        // recheck in.dat for orphaned pages
        fclose(p);
        fclose(q);
        p = fopen("c:/backup/in.dat","r");
        q = fopen("c:/backup/orphan.dat","w");
        printf("Checking for orphans.n");
        while(fgets(page_title,1024,p))
        {
                page_title[strlen(page_title)-1]=0;
                ref_page check_ref(page_title);

                cur = gHead;
                bool found=false;
                while(cur && !found)
                {
                        if(!strcmp(check_ref.page_title,cur->entry_name))
                                found=true;
                        cur=cur->next;
                }
                if(!found)
                        fprintf(q,"[%s]n",check_ref.page_title);
        }

        fcloseall();

        return 0;
}


Comments (0)

You don't have permission to comment on this page.