Parcel #6yixxni56e7qw7z

Created by Anonymous
Public

Created April 26, 2025 Expires in 17 days

Loading editor...

#include "detect_dups.h"

// Global variables
static HashEntry *hash_table = NULL;
static FileInfo *file_list = NULL;
static int file_count = 0;
static int soft_link_count = 0;
static char *base_dir = NULL;

int main(int argc, char *argv[]) {
    // Check if directory argument is provided
    if (argc != 2) {
        fprintf(stderr, "Usage: ./detect_dups <directory>\n");
        exit(EXIT_FAILURE);
    }

    // Check if the directory is valid
    struct stat sb;
    if (stat(argv[1], &sb) != 0 || !S_ISDIR(sb.st_mode)) {
        fprintf(stderr, "Error %d: %s is not a valid directory\n", errno, argv[1]);
        exit(EXIT_FAILURE);
    }

    // Initialize OpenSSL
    OpenSSL_add_all_digests();
    
    // Store the base directory for path comparison
    base_dir = realpath(argv[1], NULL);
    if (!base_dir) {
        fprintf(stderr, "Error %d: Cannot resolve path for %s\n", errno, argv[1]);
        exit(EXIT_FAILURE);
    }
    
    // Walk through the directory tree
    if (nftw(argv[1], render_file_info, 20, FTW_PHYS) == -1) {
        fprintf(stderr, "Error %d: Failed to traverse directory %s\n", errno, argv[1]);
        free(base_dir);
        exit(EXIT_FAILURE);
    }
    
    // Print the results
    print_results();
    
    // Clean up
    cleanup();
    free(base_dir);
    
    return EXIT_SUCCESS;
}

// Process each file found by nftw
static int render_file_info(const char *fpath, const struct stat *sb, int tflag, struct FTW *ftwbuf) {
    // Skip if not a regular file or symbolic link
    if (tflag != FTW_F && tflag != FTW_SL) {
        return 0;
    }
    
    // Get absolute path
    char *real_path = realpath(fpath, NULL);
    if (!real_path) {
        return 0; // Skip if can't resolve path
    }
    
    // Check if the file is within the base directory
    if (strncmp(real_path, base_dir, strlen(base_dir)) != 0) {
        free(real_path);
        return 0; // Skip if outside base directory
    }
    free(real_path);
    
    // Process regular files
    if (tflag == FTW_F) {
        char *md5_hash = calculate_md5(fpath);
        if (md5_hash) {
            add_file(fpath, sb, md5_hash);
            free(md5_hash);
        }
    }
    // Process symbolic links
    else if (tflag == FTW_SL) {
        struct stat link_sb;
        if (stat(fpath, &link_sb) == 0) {
            char *md5_hash = calculate_md5(fpath);
            if (md5_hash) {
                // Mark as a soft link by setting a flag in the stat structure
                // This is a hack, but it works for our purpose
                struct stat modified_sb = *sb;
                modified_sb.st_mode |= S_IFLNK;
                add_file(fpath, &modified_sb, md5_hash);
                free(md5_hash);
            }
        }
    }
    
    return 0;
}

// Calculate MD5 hash of a file
char* calculate_md5(const char *filename) {
    FILE *file = fopen(filename, "rb");
    if (!file) {
        return NULL;
    }
    
    EVP_MD_CTX *mdctx = EVP_MD_CTX_new();
    const EVP_MD *md = EVP_md5();
    unsigned char md_value[EVP_MAX_MD_SIZE];
    unsigned int md_len;
    
    EVP_DigestInit_ex(mdctx, md, NULL);
    
    unsigned char buffer[4096];
    size_t bytes;
    
    while ((bytes = fread(buffer, 1, sizeof(buffer), file)) != 0) {
        EVP_DigestUpdate(mdctx, buffer, bytes);
    }
    
    EVP_DigestFinal_ex(mdctx, md_value, &md_len);
    EVP_MD_CTX_free(mdctx);
    
    fclose(file);
    
    char *md5_string = malloc(33); // MD5 hash is 32 chars + null terminator
    if (!md5_string) {
        return NULL;
    }
    
    for (unsigned int i = 0; i < md_len; i++) {
        sprintf(&md5_string[i * 2], "%02x", md_value[i]);
    }
    
    return md5_string;
}

// Add a file to the appropriate data structures
void add_file(const char *path, const struct stat *sb, char *md5_hash) {
    // Look up the hash in the hash table
    HashEntry *hash_entry;
    HASH_FIND_STR(hash_table, md5_hash, hash_entry);
    
    if (!hash_entry) {
        // Create a new hash entry if it doesn't exist
        hash_entry = malloc(sizeof(HashEntry));
        if (!hash_entry) return;
        
        strcpy(hash_entry->md5_hash, md5_hash);
        
        // Create a new file info
        FileInfo *file_info = malloc(sizeof(FileInfo));
        if (!file_info) {
            free(hash_entry);
            return;
        }
        
        strcpy(file_info->md5_hash, md5_hash);
        file_info->hard_links = NULL;
        file_info->next = file_list;
        file_list = file_info;
        
        hash_entry->file_info = file_info;
        HASH_ADD_STR(hash_table, md5_hash, hash_entry);
        
        file_count++;
    }
    
    FileInfo *file_info = hash_entry->file_info;
    
    // Check if this is a symbolic link
    if (S_ISLNK(sb->st_mode)) {
        // Find the hard link this soft link points to
        HardLink *hard_link = file_info->hard_links;
        while (hard_link) {
            // Check if the target inode exists in our hard links
            if (hard_link->inode == sb->st_ino) {
                // Add this path to the soft links
                SoftLink *soft_link = hard_link->soft_links;
                
                // Find or create a soft link entry
                while (soft_link) {
                    if (soft_link->inode == sb->st_ino) {
                        // Add path to existing soft link
                        PathNode *new_path = malloc(sizeof(PathNode));
                        if (!new_path) return;
                        
                        new_path->path = strdup(path);
                        new_path->next = soft_link->paths;
                        soft_link->paths = new_path;
                        soft_link->count++;
                        return;
                    }
                    soft_link = soft_link->next;
                }
                
                // Create a new soft link
                SoftLink *new_soft_link = malloc(sizeof(SoftLink));
                if (!new_soft_link) return;
                
                new_soft_link->inode = sb->st_ino;
                new_soft_link->count = 1;
                new_soft_link->link_num = ++soft_link_count;
                
                PathNode *new_path = malloc(sizeof(PathNode));
                if (!new_path) {
                    free(new_soft_link);
                    return;
                }
                
                new_path->path = strdup(path);
                new_path->next = NULL;
                new_soft_link->paths = new_path;
                
                new_soft_link->next = hard_link->soft_links;
                hard_link->soft_links = new_soft_link;
                return;
            }
            hard_link = hard_link->next;
        }
        
        // If we get here, we need to create a new hard link and soft link
        HardLink *new_hard_link = malloc(sizeof(HardLink));
        if (!new_hard_link) return;
        
        new_hard_link->inode = sb->st_ino;
        new_hard_link->count = 0;
        new_hard_link->paths = NULL;
        
        SoftLink *new_soft_link = malloc(sizeof(SoftLink));
        if (!new_soft_link) {
            free(new_hard_link);
            return;
        }
        
        new_soft_link->inode = sb->st_ino;
        new_soft_link->count = 1;
        new_soft_link->link_num = ++soft_link_count;
        
        PathNode *new_path = malloc(sizeof(PathNode));
        if (!new_path) {
            free(new_hard_link);
            free(new_soft_link);
            return;
        }
        
        new_path->path = strdup(path);
        new_path->next = NULL;
        new_soft_link->paths = new_path;
        
        new_soft_link->next = NULL;
        new_hard_link->soft_links = new_soft_link;
        
        new_hard_link->next = file_info->hard_links;
        file_info->hard_links = new_hard_link;
    } else {
        // This is a regular file (hard link)
        HardLink *hard_link = file_info->hard_links;
        
        // Check if we already have this inode
        while (hard_link) {
            if (hard_link->inode == sb->st_ino) {
                // Add path to existing hard link
                PathNode *new_path = malloc(sizeof(PathNode));
                if (!new_path) return;
                
                new_path->path = strdup(path);
                new_path->next = hard_link->paths;
                hard_link->paths = new_path;
                hard_link->count++;
                return;
            }
            hard_link = hard_link->next;
        }
        
        // Create a new hard link
        HardLink *new_hard_link = malloc(sizeof(HardLink));
        if (!new_hard_link) return;
        
        new_hard_link->inode = sb->st_ino;
        new_hard_link->count = 1;
        new_hard_link->soft_links = NULL;
        
        PathNode *new_path = malloc(sizeof(PathNode));
        if (!new_path) {
            free(new_hard_link);
            return;
        }
        
        new_path->path = strdup(path);
        new_path->next = NULL;
        new_hard_link->paths = new_path;
        
        new_hard_link->next = file_info->hard_links;
        file_info->hard_links = new_hard_link;
    }
}

// Print the results
void print_results() {
    int file_num = 1;
    
    FileInfo *file_info = file_list;
    while (file_info) {
        printf("File %d\n", file_num++);
        printf("\tMD5 Hash: %s\n", file_info->md5_hash);
        
        HardLink *hard_link = file_info->hard_links;
        while (hard_link) {
            printf("\t\tHard Link (%d): %lu\n", hard_link->count, (unsigned long)hard_link->inode);
            printf("\t\t\tPaths:\t");
            
            // Print paths in reverse order (to match the expected output format)
            PathNode *paths[hard_link->count];
            int path_count = 0;
            
            PathNode *path = hard_link->paths;
            while (path) {
                paths[path_count++] = path;
                path = path->next;
            }
            
            // Print the first path
            if (path_count > 0) {
                printf("%s\n", paths[path_count - 1]->path);
            }
            
            // Print the rest of the paths
            for (int i = path_count - 2; i >= 0; i--) {
                printf("\t\t\t\t%s\n", paths[i]->path);
            }
            
            // Print soft links
            SoftLink *soft_link = hard_link->soft_links;
            int soft_link_num = 1;
            
            while (soft_link) {
                printf("\t\t\tSoft Link %d(%d): %lu\n", 
                       soft_link->link_num, soft_link->count, 
                       (unsigned long)soft_link->inode);
                printf("\t\t\t\tPaths:\t");
                
                // Print paths in reverse order
                PathNode *soft_paths[soft_link->count];
                int soft_path_count = 0;
                
                PathNode *soft_path = soft_link->paths;
                while (soft_path) {
                    soft_paths[soft_path_count++] = soft_path;
                    soft_path = soft_path->next;
                }
                
                // Print the first path
                if (soft_path_count > 0) {
                    printf("%s\n", soft_paths[soft_path_count - 1]->path);
                }
                
                // Print the rest of the paths
                for (int i = soft_path_count - 2; i >= 0; i--) {
                    printf("\t\t\t\t\t%s\n", soft_paths[i]->path);
                }
                
                soft_link = soft_link->next;
                soft_link_num++;
            }
            
            hard_link = hard_link->next;
        }
        
        file_info = file_info->next;
    }
}

// Free memory
void cleanup() {
    // Free the hash table
    HashEntry *current_hash, *tmp_hash;
    HASH_ITER(hh, hash_table, current_hash, tmp_hash) {
        HASH_DEL(hash_table, current_hash);
        free(current_hash);
    }
    
    // Free the file list
    while (file_list) {
        FileInfo *next_file = file_list->next;
        
        // Free hard links
        HardLink *hard_link = file_list->hard_links;
        while (hard_link) {
            HardLink *next_hard_link = hard_link->next;
            
            // Free paths
            PathNode *path = hard_link->paths;
            while (path) {
                PathNode *next_path = path->next;
                free(path->path);
                free(path);
                path = next_path;
            }
            
            // Free soft links
            SoftLink *soft_link = hard_link->soft_links;
            while (soft_link) {
                SoftLink *next_soft_link = soft_link->next;
                
                // Free paths
                PathNode *soft_path = soft_link->paths;
                while (soft_path) {
                    PathNode *next_soft_path = soft_path->next;
                    free(soft_path->path);
                    free(soft_path);
                    soft_path = next_soft_path;
                }
                
                free(soft_link);
                soft_link = next_soft_link;
            }
            
            free(hard_link);
            hard_link = next_hard_link;
        }
        
        free(file_list);
        file_list = next_file;
    }
    
    // Clean up OpenSSL
    EVP_cleanup();
}