src/FileAttributeFinder.h
author František Kučera <franta-hg@frantovo.cz>
Fri, 26 Jul 2019 22:50:35 +0200
branchv_0
changeset 22 31e7f1994b12
parent 21 1252acdc5a5a
child 24 4353cd19a6b5
permissions -rw-r--r--
reading file content (with unicode support)

/**
 * Relational pipes
 * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#pragma once

#include <vector>
#include <filesystem>
#include <fstream>
#include <sstream>

#include <relpipe/writer/typedefs.h>
#include <relpipe/writer/AttributeMetadata.h>
#include <relpipe/writer/RelationalWriter.h>

#include "RequestedField.h"
#include "AttributeFinder.h"

namespace relpipe {
namespace in {
namespace filesystem {

namespace fs = std::filesystem;
using namespace relpipe::writer;

class FileAttributeFinder : public AttributeFinder {
private:
	std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings.

	string_t currentOwner;
	string_t currentGroup;

	string_t getType(const fs::path& file, bool workWithSymlinkItself) {
		// TODO: Use whole words? (letters are compatible with find -type)
		if (fs::is_symlink(file) && workWithSymlinkItself) return L"l"; // symlinks are both symlinks and files/directories/etc.
		else if (fs::is_regular_file(file)) return L"f";
		else if (fs::is_directory(file)) return L"d";
		else if (fs::is_fifo(file)) return L"p";
		else if (fs::is_socket(file)) return L"s";
		else if (fs::is_block_file(file)) return L"b";
		else if (fs::is_character_file(file)) return L"c";
		else return L"o";
	}

	void fetchOwner(const fs::path& file, string_t& owner, string_t& group) {
		// TODO: throw exception on error
		// TODO: get user and group in C++ way?
		struct stat info;
		stat(file.c_str(), &info);
		/**
		 * The return value may point to a static area, and may  be
		 * overwritten  by  subsequent calls to getpwent(3), getpw‐
		 * nam(), or getpwuid().  (Do not pass the returned pointer
		 * to free(3).)
		 */
		struct passwd* pw = getpwuid(info.st_uid);
		struct group* gr = getgrgid(info.st_gid);
		owner = convertor.from_bytes(pw->pw_name);
		group = convertor.from_bytes(gr->gr_name);
	}

	string_t getContent(const fs::path& file) {
		try {
			ifstream input(file);
			std::stringstream bytes;
			bytes << input.rdbuf();

			// TODO: optional whitespace trimming or substring
			// TODO: custom encoding + read encoding from xattr
			return convertor.from_bytes(bytes.str());
		} catch (const std::range_error& e) {
			// TODO: allow custom error value or fallback to HEX/Base64
			return L"";
		}
	}

protected:

	virtual void writeFieldOfExistingFile(RelationalWriter* writer, const RequestedField& field) override {
		if (field.group == RequestedField::GROUP_FILE) {
			for (string_t alias : field.getAliases()) {
				if (field.name == FIELD_NAME) {
					writer->writeAttribute(currentFile.filename().wstring());
				} else if (field.name == FIELD_PATH_ORIGINAL) {
					writer->writeAttribute(convertor.from_bytes(currentFileRaw));
				} else if (field.name == FIELD_PATH_ABSOLUTE) {
					writer->writeAttribute(fs::absolute(currentFile).wstring());
				} else if (field.name == FIELD_PATH_CANONICAL) {
					writer->writeAttribute(fs::canonical(currentFile).wstring());
				} else if (field.name == FIELD_TYPE) {
					writer->writeAttribute(getType(currentFile, true));
				} else if (field.name == FIELD_SYMLINK_TARGET_TYPE) {
					writer->writeAttribute(getType(currentFile, false));
				} else if (field.name == FIELD_SYMLINK_TARGET) {
					string_t symlinkTarget; // TODO: null value (when supported)
					if (fs::is_symlink(currentFile)) {
						try {
							symlinkTarget = fs::read_symlink(currentFile).wstring();
						} catch (...) {
							// missing permissions, not readable → leave empty
						}
					}
					writer->writeAttribute(symlinkTarget);
				} else if (field.name == FIELD_SIZE) {
					integer_t size = fs::is_regular_file(currentFile) ? fs::file_size(currentFile) : 0;
					writer->writeAttribute(&size, typeid (size));
				} else if (field.name == FIELD_OWNER) {
					if (currentOwner.empty()) fetchOwner(currentFile, currentOwner, currentGroup);
					writer->writeAttribute(currentOwner);
				} else if (field.name == FIELD_GROUP) {
					if (currentOwner.empty()) fetchOwner(currentFile, currentOwner, currentGroup);
					writer->writeAttribute(currentGroup);
				} else if (field.name == FIELD_CONTENT) {
					writer->writeAttribute(getContent(currentFile));
				} else {
					throw RelpipeWriterException(L"Unsupported field name in FileAttributeFinder: " + field.name);
				}
			}
		}
	}

public:

	static const string_t FIELD_PATH_ORIGINAL;
	static const string_t FIELD_NAME;
	static const string_t FIELD_SIZE;
	static const string_t FIELD_PATH_ABSOLUTE;
	static const string_t FIELD_PATH_CANONICAL;
	static const string_t FIELD_TYPE;
	static const string_t FIELD_SYMLINK_TARGET_TYPE;
	static const string_t FIELD_SYMLINK_TARGET;
	static const string_t FIELD_OWNER;
	static const string_t FIELD_GROUP;
	static const string_t FIELD_CONTENT;

	virtual vector<AttributeMetadata> toMetadata(const RequestedField& field) override {
		if (field.group == RequestedField::GROUP_FILE) {
			vector<AttributeMetadata> metadata;
			for (string_t alias : field.getAliases()) {
				if (field.name == FIELD_SIZE) metadata.push_back(AttributeMetadata{alias, TypeId::INTEGER});
				else metadata.push_back(AttributeMetadata{alias, TypeId::STRING});
			}
			return metadata;
		} else {
			return {};
		}
	}

	void writeField(RelationalWriter* writer, const RequestedField& field) override {
		if (currentFileExists
				|| field.name == FIELD_PATH_ORIGINAL
				|| (fs::is_symlink(currentFile) && (field.name == FIELD_SYMLINK_TARGET || field.name == FIELD_TYPE)))
			writeFieldOfExistingFile(writer, field);
		else
			writeEmptyField(writer, field);
	}

	void endFile() override {
		AttributeFinder::endFile();
		currentOwner.clear();
		currentGroup.clear();
	};

	virtual ~FileAttributeFinder() override {
	}
};

const string_t FileAttributeFinder::FIELD_PATH_ORIGINAL = L"path";
const string_t FileAttributeFinder::FIELD_NAME = L"name";
const string_t FileAttributeFinder::FIELD_SIZE = L"size";
const string_t FileAttributeFinder::FIELD_PATH_ABSOLUTE = L"path_absolute";
const string_t FileAttributeFinder::FIELD_PATH_CANONICAL = L"path_canonical";
const string_t FileAttributeFinder::FIELD_TYPE = L"type";
const string_t FileAttributeFinder::FIELD_SYMLINK_TARGET_TYPE = L"symlink_target_type";
const string_t FileAttributeFinder::FIELD_SYMLINK_TARGET = L"symlink_target";
const string_t FileAttributeFinder::FIELD_OWNER = L"owner";
const string_t FileAttributeFinder::FIELD_GROUP = L"group";
const string_t FileAttributeFinder::FIELD_CONTENT = L"content";

}
}
}